Spaces:
Sleeping
Sleeping
<html class="no-js" lang="en"> | |
<head><meta charset="utf-8"/> | |
<meta name="viewport" content="width=device-width,initial-scale=1"/> | |
<meta name="color-scheme" content="light dark"><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> | |
<link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Utilities" href="cutlass.utils.html" /><link rel="prev" title="Emitters" href="cutlass.emit.html" /> | |
<link rel="canonical" href="docs/cutlass.op.html" /> | |
<!-- Generated with Sphinx 6.1.3 and Furo 2023.03.27 --> | |
<title>Operations - CUTLASS Python</title> | |
<link rel="stylesheet" type="text/css" href="_static/pygments.css" /> | |
<link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=fad236701ea90a88636c2a8c73b44ae642ed2a53" /> | |
<link rel="stylesheet" type="text/css" href="_static/copybutton.css" /> | |
<link rel="stylesheet" type="text/css" href="_static/tabs.css" /> | |
<link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" /> | |
<style> | |
body { | |
--color-code-background: #eeffcc; | |
--color-code-foreground: black; | |
--color-brand-primary: #76B900; | |
--color-brand-content: #76B900; | |
} | |
@media not print { | |
body[data-theme="dark"] { | |
--color-code-background: #272822; | |
--color-code-foreground: #f8f8f2; | |
--color-brand-primary: #76B900; | |
--color-brand-content: #76B900; | |
} | |
@media (prefers-color-scheme: dark) { | |
body:not([data-theme="light"]) { | |
--color-code-background: #272822; | |
--color-code-foreground: #f8f8f2; | |
--color-brand-primary: #76B900; | |
--color-brand-content: #76B900; | |
} | |
} | |
} | |
</style></head> | |
<body> | |
<script> | |
document.body.dataset.theme = localStorage.getItem("theme") || "auto"; | |
</script> | |
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;"> | |
<symbol id="svg-toc" viewBox="0 0 24 24"> | |
<title>Contents</title> | |
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024"> | |
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/> | |
</svg> | |
</symbol> | |
<symbol id="svg-menu" viewBox="0 0 24 24"> | |
<title>Menu</title> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu"> | |
<line x1="3" y1="12" x2="21" y2="12"></line> | |
<line x1="3" y1="6" x2="21" y2="6"></line> | |
<line x1="3" y1="18" x2="21" y2="18"></line> | |
</svg> | |
</symbol> | |
<symbol id="svg-arrow-right" viewBox="0 0 24 24"> | |
<title>Expand</title> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right"> | |
<polyline points="9 18 15 12 9 6"></polyline> | |
</svg> | |
</symbol> | |
<symbol id="svg-sun" viewBox="0 0 24 24"> | |
<title>Light mode</title> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun"> | |
<circle cx="12" cy="12" r="5"></circle> | |
<line x1="12" y1="1" x2="12" y2="3"></line> | |
<line x1="12" y1="21" x2="12" y2="23"></line> | |
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line> | |
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line> | |
<line x1="1" y1="12" x2="3" y2="12"></line> | |
<line x1="21" y1="12" x2="23" y2="12"></line> | |
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line> | |
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line> | |
</svg> | |
</symbol> | |
<symbol id="svg-moon" viewBox="0 0 24 24"> | |
<title>Dark mode</title> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon"> | |
<path stroke="none" d="M0 0h24v24H0z" fill="none" /> | |
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" /> | |
</svg> | |
</symbol> | |
<symbol id="svg-sun-half" viewBox="0 0 24 24"> | |
<title>Auto light/dark mode</title> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" | |
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow"> | |
<path stroke="none" d="M0 0h24v24H0z" fill="none"/> | |
<circle cx="12" cy="12" r="9" /> | |
<path d="M13 12h5" /> | |
<path d="M13 15h4" /> | |
<path d="M13 18h1" /> | |
<path d="M13 9h4" /> | |
<path d="M13 6h1" /> | |
</svg> | |
</symbol> | |
</svg> | |
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation"> | |
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc"> | |
<label class="overlay sidebar-overlay" for="__navigation"> | |
<div class="visually-hidden">Hide navigation sidebar</div> | |
</label> | |
<label class="overlay toc-overlay" for="__toc"> | |
<div class="visually-hidden">Hide table of contents sidebar</div> | |
</label> | |
<div class="page"> | |
<header class="mobile-header"> | |
<div class="header-left"> | |
<label class="nav-overlay-icon" for="__navigation"> | |
<div class="visually-hidden">Toggle site navigation sidebar</div> | |
<i class="icon"><svg><use href="#svg-menu"></use></svg></i> | |
</label> | |
</div> | |
<div class="header-center"> | |
<a href="index.html"><div class="brand">CUTLASS Python</div></a> | |
</div> | |
<div class="header-right"> | |
<div class="theme-toggle-container theme-toggle-header"> | |
<button class="theme-toggle"> | |
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div> | |
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg> | |
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg> | |
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg> | |
</button> | |
</div> | |
<label class="toc-overlay-icon toc-header-icon" for="__toc"> | |
<div class="visually-hidden">Toggle table of contents sidebar</div> | |
<i class="icon"><svg><use href="#svg-toc"></use></svg></i> | |
</label> | |
</div> | |
</header> | |
<aside class="sidebar-drawer"> | |
<div class="sidebar-container"> | |
<div class="sidebar-sticky"><a class="sidebar-brand" href="index.html"> | |
<div class="sidebar-logo-container"> | |
<img class="sidebar-logo only-light" src="_static/cutlass-logo-small.png" alt="Light Logo"/> | |
<img class="sidebar-logo only-dark" src="_static/cutlass-logo-small.png" alt="Dark Logo"/> | |
</div> | |
<span class="sidebar-brand-text">CUTLASS Python</span> | |
</a><form class="sidebar-search-container" method="get" action="search.html" role="search"> | |
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search"> | |
<input type="hidden" name="check_keywords" value="yes"> | |
<input type="hidden" name="area" value="default"> | |
</form> | |
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree"> | |
<ul> | |
<li class="toctree-l1"><a class="reference internal" href="index.html">Home</a></li> | |
</ul> | |
<p class="caption" role="heading"><span class="caption-text">Getting Started:</span></p> | |
<ul> | |
<li class="toctree-l1"><a class="reference internal" href="install.html">Installation</a></li> | |
<li class="toctree-l1"><a class="reference internal" href="externals/00_basic_gemm.html">Getting Started</a></li> | |
<li class="toctree-l1"><a class="reference internal" href="contribute.html">Contributing</a></li> | |
</ul> | |
<p class="caption" role="heading"><span class="caption-text">Python Documentation:</span></p> | |
<ul class="current"> | |
<li class="toctree-l1 current has-children"><a class="reference internal" href="modules.html">CUTLASS Python API</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul class="current"> | |
<li class="toctree-l2 current has-children"><a class="reference internal" href="cutlass.html">CUTLASS</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul class="current"> | |
<li class="toctree-l3"><a class="reference internal" href="cutlass.emit.html">Emitters</a></li> | |
<li class="toctree-l3 current current-page"><a class="current reference internal" href="#">Operations</a></li> | |
<li class="toctree-l3"><a class="reference internal" href="cutlass.utils.html">Utilities</a></li> | |
</ul> | |
</li> | |
</ul> | |
</li> | |
</ul> | |
<p class="caption" role="heading"><span class="caption-text">Examples and Tutorials:</span></p> | |
<ul> | |
<li class="toctree-l1 has-children"><a class="reference internal" href="examples.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul> | |
<li class="toctree-l2"><a class="reference internal" href="externals/00_basic_gemm.html">Basic GEMM</a></li> | |
<li class="toctree-l2"><a class="reference internal" href="externals/01_epilogue.html">Epilogue</a></li> | |
<li class="toctree-l2"><a class="reference internal" href="externals/02_pytorch_extension_grouped_gemm.html">PyTorch Extension</a></li> | |
</ul> | |
</li> | |
</ul> | |
<p class="caption" role="heading"><span class="caption-text">Reference:</span></p> | |
<ul> | |
<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/cutlass">Github</a></li> | |
</ul> | |
</div> | |
</div> | |
</div> | |
</div> | |
</aside> | |
<div class="main"> | |
<div class="content"> | |
<div class="article-container"> | |
<a href="#" class="back-to-top muted-link"> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"> | |
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path> | |
</svg> | |
<span>Back to top</span> | |
</a> | |
<div class="content-icon-container"> | |
<div class="theme-toggle-container theme-toggle-content"> | |
<button class="theme-toggle"> | |
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div> | |
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg> | |
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg> | |
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg> | |
</button> | |
</div> | |
<label class="toc-overlay-icon toc-content-icon" for="__toc"> | |
<div class="visually-hidden">Toggle table of contents sidebar</div> | |
<i class="icon"><svg><use href="#svg-toc"></use></svg></i> | |
</label> | |
</div> | |
<article role="main"> | |
<section id="operations"> | |
<h1>Operations<a class="headerlink" href="#operations" title="Permalink to this heading">#</a></h1> | |
<section id="module-cutlass.op.gemm"> | |
<span id="gemm"></span><h2>GEMM<a class="headerlink" href="#module-cutlass.op.gemm" title="Permalink to this heading">#</a></h2> | |
<p>Ease-of-use interface for constructing, compiling, and running GEMMs.</p> | |
<p>The <code class="docutils literal notranslate"><span class="pre">Gemm</span></code> interface is meant to allow one to easily instantiate, compile, and run | |
GEMM operations in CUTLASS via Python, without specifying many configuration parameters. | |
Under the hood, the interface will select sensible default parameters for the many template | |
parameters for CUTLASS GEMMs.</p> | |
<p>Note: optimal performance is not to be expected from this interface. To achieve optimal | |
performance, one should specify and tune each configuration parameter.</p> | |
<p>The simplest example of using this interface is the following:</p> | |
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># A, B, C, and D are torch/numpy/cupy tensor objects</span> | |
<span class="n">plan</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">Gemm</span><span class="p">(</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">,</span> <span class="n">C</span><span class="p">,</span> <span class="n">D</span><span class="p">)</span> | |
<span class="n">plan</span><span class="o">.</span><span class="n">run</span><span class="p">()</span> | |
</pre></div> | |
</div> | |
<p>One can also use the interface by specifying data types of operands at construction | |
and using different tensor objects with these data types at runtime:</p> | |
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># The following is shorthand for:</span> | |
<span class="c1"># cutlass.op.Gemm(element_A=torch.float32, element_B=torch.float32,</span> | |
<span class="c1"># element_C=torch.float32, element_D=torch.float32,</span> | |
<span class="c1"># element_accumulator=torch.float32,</span> | |
<span class="c1"># layout=cutlass.LayoutType.RowMajor)</span> | |
<span class="n">plan</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">Gemm</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
<span class="n">A0</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">rand</span><span class="p">((</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span> | |
<span class="n">B0</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">rand</span><span class="p">((</span><span class="mi">256</span><span class="p">,</span> <span class="mi">64</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span> | |
<span class="n">C0</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">128</span><span class="p">,</span> <span class="mi">64</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span> | |
<span class="n">D0</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">128</span><span class="p">,</span> <span class="mi">64</span><span class="p">),</span> <span class="n">device</span><span class="o">.</span><span class="s1">'cuda'</span><span class="p">)</span> | |
<span class="n">plan</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">A0</span><span class="p">,</span> <span class="n">B0</span><span class="p">,</span> <span class="n">C0</span><span class="p">,</span> <span class="n">D0</span><span class="p">)</span> | |
<span class="n">A</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">rand</span><span class="p">((</span><span class="mi">32</span><span class="p">,</span> <span class="mi">128</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span> | |
<span class="n">B</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">rand</span><span class="p">((</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span> | |
<span class="n">C</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">32</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span> | |
<span class="n">D</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">32</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span> <span class="n">device</span><span class="o">.</span><span class="s1">'cuda'</span><span class="p">)</span> | |
<span class="n">plan</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">A1</span><span class="p">,</span> <span class="n">B1</span><span class="p">,</span> <span class="n">C1</span><span class="p">,</span> <span class="n">D1</span><span class="p">)</span> | |
</pre></div> | |
</div> | |
<p>The interface additionally enables one to decouple the compilation of the underlying CUTLASS | |
kernel from its execution:</p> | |
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">plan</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">Gemm</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
<span class="n">plan</span><span class="o">.</span><span class="n">compile</span><span class="p">()</span> | |
<span class="c1"># Do other work...</span> | |
<span class="n">plan</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">A0</span><span class="p">,</span> <span class="n">B0</span><span class="p">,</span> <span class="n">C0</span><span class="p">,</span> <span class="n">D0</span><span class="p">)</span> | |
<span class="c1"># Do other work...</span> | |
<span class="n">plan</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">A1</span><span class="p">,</span> <span class="n">B1</span><span class="p">,</span> <span class="n">C1</span><span class="p">,</span> <span class="n">D1</span><span class="p">)</span> | |
</pre></div> | |
</div> | |
<p>Elementwise activation functions are easily fused to the GEMM via the interface:</p> | |
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">plan</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">Gemm</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
<span class="n">plan</span><span class="o">.</span><span class="n">activation</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">epilogue</span><span class="o">.</span><span class="n">relu</span> | |
</pre></div> | |
</div> | |
<p>Operations can also be run asynchronously:</p> | |
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">plan</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">Gemm</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
<span class="n">args</span> <span class="o">=</span> <span class="n">plan</span><span class="o">.</span><span class="n">run</span><span class="p">()</span> | |
<span class="c1"># Do other work...</span> | |
<span class="n">args</span><span class="o">.</span><span class="n">sync</span><span class="p">()</span> | |
</pre></div> | |
</div> | |
<dl class="py class"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm.Gemm"> | |
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">cutlass.op.gemm.</span></span><span class="sig-name descname"><span class="pre">Gemm</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">D</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">beta</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_accumulator</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_D</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout_A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout_B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout_C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cc</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kernel_cc</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/gemm.html#Gemm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.gemm.Gemm" title="Permalink to this definition">#</a></dt> | |
<dd><p>Bases: <a class="reference internal" href="#cutlass.op.op.OperationBase" title="cutlass.op.op.OperationBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">OperationBase</span></code></a></p> | |
<p>Constructs a <code class="docutils literal notranslate"><span class="pre">Gemm</span></code> object.</p> | |
<p>The data types and layouts of operands A, B, and C, along with the data type of output D | |
and that used for accumulation, are bound to the <code class="docutils literal notranslate"><span class="pre">Gemm</span></code> object throughout its lifetime – | |
these are not to be changed after a <code class="docutils literal notranslate"><span class="pre">Gemm</span></code> has been constructed.</p> | |
<p>The constructor has optional parameters for flexibly setting these parameters. The following | |
constructors are equivalent:</p> | |
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Use F32 for A, B, C, D, and accumulation. All operands are row major.</span> | |
<span class="c1"># Use the generic ``element`` and ``layout`` parameters to concisely set all data types and layouts</span> | |
<span class="c1"># for operands to the same values.</span> | |
<span class="n">Gemm</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
<span class="c1"># Explicitly specify the data types to use for A, B, C, and D. Use the generic ``layout``.</span> | |
<span class="n">Gemm</span><span class="p">(</span><span class="n">element_A</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">element_B</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">element_C</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> | |
<span class="n">element_D</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
<span class="c1"># Set the data types and elements from existing tensors. Note that one can use different tensors when</span> | |
<span class="c1"># executing GEMM via the ``run()`` method than passed in here (though those passed in to ``run()`` must</span> | |
<span class="c1"># have the same data type and layout as those passed in here).</span> | |
<span class="c1"># A, B, C, and D are row-major torch.Tensor objects of type torch.float32</span> | |
<span class="n">Gemm</span><span class="p">(</span><span class="n">A</span><span class="o">=</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="o">=</span><span class="n">B</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">,</span> <span class="n">D</span><span class="o">=</span><span class="n">D</span><span class="p">)</span> | |
<span class="c1"># Use the generic ``element`` and explicitly specify the layouts to use for A, B, and C (layout of D is</span> | |
<span class="c1"># the same as that for D, at present)</span> | |
<span class="n">Gemm</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">layout_A</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">,</span> | |
<span class="n">layout_B</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">,</span> <span class="n">layout_C</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
<span class="c1"># Explicitly specify the data type and layout for only some of A, B, C, and D. Unspecified data types</span> | |
<span class="c1"># and layouts will inherit those passed in via the generic ``element`` and ``layout``</span> | |
<span class="n">Gemm</span><span class="p">(</span><span class="n">element_A</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">layout_B</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">,</span> | |
<span class="n">element</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
</pre></div> | |
</div> | |
<dl class="simple"> | |
<dt>The order of precedence for the setting of the data type and layout for a given operand/output is as follows:</dt><dd><ol class="arabic simple"> | |
<li><p>If the tensor type is specified (e.g., <code class="docutils literal notranslate"><span class="pre">A</span></code>), use the data type and layout inferred from this tensor</p></li> | |
<li><p>Otherwise, if the data type/layout (e.g., <code class="docutils literal notranslate"><span class="pre">element_A</span></code>, <code class="docutils literal notranslate"><span class="pre">layout_A</span></code>) is specified, use those</p></li> | |
<li><p>Otherwise, use the generic values (e.g., <code class="docutils literal notranslate"><span class="pre">element</span></code>, <code class="docutils literal notranslate"><span class="pre">layout</span></code>)</p></li> | |
</ol> | |
</dd> | |
</dl> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Parameters<span class="colon">:</span></dt> | |
<dd class="field-odd"><ul class="simple"> | |
<li><p><strong>cc</strong> (<em>int</em>) – compute capability of device for which kernels should be compiled. For example, if running on H100, this should be set to 90</p></li> | |
<li><p><strong>kernel_cc</strong> (<em>int</em>) – compute capability of kernels to generate. For example, if running on SM90, but desiring to use a CUTLASS 2.x-style Ampere kernel, this should be set to 80</p></li> | |
<li><p><strong>A</strong> – tensor representing data type and layout of operand A</p></li> | |
<li><p><strong>B</strong> – tensor representing data type and layout of operand B</p></li> | |
<li><p><strong>C</strong> – tensor representing data type and layout of operand C</p></li> | |
<li><p><strong>D</strong> – tensor representing data type and layout of operand D</p></li> | |
<li><p><strong>alpha</strong> – scalar paramter alpha from GEMM computation that scales the product of operands A and B</p></li> | |
<li><p><strong>beta</strong> – scalar parameter beta from GEMM operation that scales operand C</p></li> | |
<li><p><strong>element_accumulator</strong> (<em>cutlass.DataType</em>) – data type to be used in accumulation of the product of operands A and B</p></li> | |
<li><p><strong>element</strong> (<em>cutlass.DataType</em>) – generic data type to be used for operands A, B, C, D, as well as the accumulation data type</p></li> | |
<li><p><strong>layout</strong> (<em>cutlass.LayoutType</em>) – generic layout type to be used for operands A, B, C, and D</p></li> | |
<li><p><strong>element_A</strong> (<em>cutlass.DataType</em>) – data type to be used for operand A</p></li> | |
<li><p><strong>element_B</strong> (<em>cutlass.DataType</em>) – data type to be used for operand B</p></li> | |
<li><p><strong>element_C</strong> (<em>cutlass.DataType</em>) – data type to be used for operand C</p></li> | |
<li><p><strong>element_D</strong> (<em>cutlass.DataType</em>) – data type to be used for operand D</p></li> | |
<li><p><strong>layout_A</strong> (<em>layout</em><em> of </em><em>operand A</em>) – cutlass.LayoutType</p></li> | |
<li><p><strong>layout_B</strong> (<em>layout</em><em> of </em><em>operand B</em>) – cutlass.LayoutType</p></li> | |
<li><p><strong>layout_C</strong> (<em>layout</em><em> of </em><em>operand C</em>) – cutlass.LayoutType</p></li> | |
<li><p><strong>layout_D</strong> (<em>layout</em><em> of </em><em>operand D</em>) – cutlass.LayoutType</p></li> | |
</ul> | |
</dd> | |
</dl> | |
<dl class="py property"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm.Gemm.activation"> | |
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">activation</span></span><a class="headerlink" href="#cutlass.op.gemm.Gemm.activation" title="Permalink to this definition">#</a></dt> | |
<dd><p>Returns the type of the current activation function used</p> | |
</dd></dl> | |
<dl class="py method"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm.Gemm.compile"> | |
<span class="sig-name descname"><span class="pre">compile</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tile_description</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">print_module</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/gemm.html#Gemm.compile"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.gemm.Gemm.compile" title="Permalink to this definition">#</a></dt> | |
<dd><p>Emits and compiles the kernel currently specified. If <code class="docutils literal notranslate"><span class="pre">tile_description</span></code> and any | |
of the <code class="docutils literal notranslate"><span class="pre">alignment</span></code> parameters are set, the kernel will be chosen using this | |
tile description and alignments. Otherwise, a default tile description and alignment | |
will be used.</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Parameters<span class="colon">:</span></dt> | |
<dd class="field-odd"><ul class="simple"> | |
<li><p><strong>tile_description</strong> (<em>cutlass.backend.TileDescription</em>) – tile description specifying shapes and operand types to use in the kernel</p></li> | |
<li><p><strong>alignment_A</strong> (<em>int</em>) – alignment of operand A</p></li> | |
<li><p><strong>alignment_B</strong> (<em>int</em>) – alignment of operand B</p></li> | |
<li><p><strong>alignment_C</strong> (<em>int</em>) – alignment of operand C</p></li> | |
<li><p><strong>print_module</strong> (<em>bool</em>) – whether to print the emitted C++ code</p></li> | |
</ul> | |
</dd> | |
<dt class="field-even">Returns<span class="colon">:</span></dt> | |
<dd class="field-even"><p>operation that was compiled</p> | |
</dd> | |
<dt class="field-odd">Return type<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>cutlass.backend.GemmOperationUniversal</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
<dl class="py method"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm.Gemm.construct"> | |
<span class="sig-name descname"><span class="pre">construct</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tile_description</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/gemm.html#Gemm.construct"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.gemm.Gemm.construct" title="Permalink to this definition">#</a></dt> | |
<dd><p>Constructs a <code class="docutils literal notranslate"><span class="pre">cutlass.backend.GemmUniversalOperation</span></code> based on the input parameters and current | |
kernel specification of the <code class="docutils literal notranslate"><span class="pre">Gemm</span></code> object.</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Parameters<span class="colon">:</span></dt> | |
<dd class="field-odd"><ul class="simple"> | |
<li><p><strong>tile_description</strong> (<em>cutlass.backend.TileDescription</em>) – tile description specifying shapes and operand types to use in the kernel</p></li> | |
<li><p><strong>alignment_A</strong> (<em>int</em>) – alignment of operand A</p></li> | |
<li><p><strong>alignment_B</strong> (<em>int</em>) – alignment of operand B</p></li> | |
<li><p><strong>alignment_C</strong> (<em>int</em>) – alignment of operand C</p></li> | |
</ul> | |
</dd> | |
<dt class="field-even">Returns<span class="colon">:</span></dt> | |
<dd class="field-even"><p>operation that was constructed</p> | |
</dd> | |
<dt class="field-odd">Return type<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>cutlass.backend.GemmOperationUniversal</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
<dl class="py property"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm.Gemm.opclass"> | |
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">opclass</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">OpcodeClass</span></em><a class="headerlink" href="#cutlass.op.gemm.Gemm.opclass" title="Permalink to this definition">#</a></dt> | |
<dd><p>Returns the opcode class currently in use by the GEMM</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Returns<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>opcode class currently in use</p> | |
</dd> | |
<dt class="field-even">Return type<span class="colon">:</span></dt> | |
<dd class="field-even"><p>cutlass.OpcodeClass</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
<dl class="py method"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm.Gemm.run"> | |
<span class="sig-name descname"><span class="pre">run</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">D</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">beta</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_count</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sync</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">print_module</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/gemm.html#Gemm.run"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.gemm.Gemm.run" title="Permalink to this definition">#</a></dt> | |
<dd><p>Runs the kernel currently specified. If it has not already been, the kernel is emitted and | |
compiled. Tensors holding operands and outputs of the kernel are sourced either from the | |
<code class="docutils literal notranslate"><span class="pre">A</span></code>, <code class="docutils literal notranslate"><span class="pre">B</span></code>, <code class="docutils literal notranslate"><span class="pre">C</span></code>, <code class="docutils literal notranslate"><span class="pre">D</span></code>, <code class="docutils literal notranslate"><span class="pre">alpha</span></code>, and <code class="docutils literal notranslate"><span class="pre">beta</span></code> | |
parameters provided in this call, or from those | |
passed in on the construction of this object – one of the two must be specified.</p> | |
<p>By default, this call returns only once the kernel has completed. To launch the kernel | |
and immediately return, set <code class="docutils literal notranslate"><span class="pre">sync=False</span></code>. In this case, it is the responsibility of the | |
caller to syncrhonize the results of the kernel before attempting to access outputs | |
by calling <code class="docutils literal notranslate"><span class="pre">sync()</span></code> on the arguments returned from this call.</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Parameters<span class="colon">:</span></dt> | |
<dd class="field-odd"><ul class="simple"> | |
<li><p><strong>A</strong> – tensor representing data type and layout of operand A</p></li> | |
<li><p><strong>B</strong> – tensor representing data type and layout of operand B</p></li> | |
<li><p><strong>C</strong> – tensor representing data type and layout of operand C</p></li> | |
<li><p><strong>D</strong> – tensor representing data type and layout of operand D</p></li> | |
<li><p><strong>alpha</strong> – scalar paramter alpha from GEMM computation that scales the product of operands A and B</p></li> | |
<li><p><strong>beta</strong> – scalar parameter beta from GEMM operation that scales operand C</p></li> | |
<li><p><strong>batch_count</strong> (<em>int</em>) – number of GEMMs in the batch</p></li> | |
<li><p><strong>sync</strong> (<em>bool</em>) – whether the call should wait for the kernel to complete before returning</p></li> | |
<li><p><strong>print_module</strong> (<em>bool</em>) – whether to print the emitted C++ code</p></li> | |
</ul> | |
</dd> | |
<dt class="field-even">Returns<span class="colon">:</span></dt> | |
<dd class="field-even"><p>arguments passed in to the kernel</p> | |
</dd> | |
<dt class="field-odd">Return type<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>cutlass.backend.GemmArguments</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
<dl class="py property"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm.Gemm.swizzling_functor"> | |
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">swizzling_functor</span></span><a class="headerlink" href="#cutlass.op.gemm.Gemm.swizzling_functor" title="Permalink to this definition">#</a></dt> | |
<dd><p>Returns the type of the swizzling functor currently being used by the GEMM</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Returns<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>swizzing functor type</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
<dl class="py method"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm.Gemm.tile_descriptions"> | |
<span class="sig-name descname"><span class="pre">tile_descriptions</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/gemm.html#Gemm.tile_descriptions"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.gemm.Gemm.tile_descriptions" title="Permalink to this definition">#</a></dt> | |
<dd><p>Returns a list of valid tile descriptions for the operations</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Returns<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>list of valid tile descriptions for the operations</p> | |
</dd> | |
<dt class="field-even">Return type<span class="colon">:</span></dt> | |
<dd class="field-even"><p>list</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
</dd></dl> | |
</section> | |
<section id="module-cutlass.op.gemm_grouped"> | |
<span id="grouped-gemm"></span><h2>Grouped GEMM<a class="headerlink" href="#module-cutlass.op.gemm_grouped" title="Permalink to this heading">#</a></h2> | |
<p>Ease-of-use interface for constructing, compiling, and running GEMMs.</p> | |
<p>The <code class="docutils literal notranslate"><span class="pre">GroupedGemm</span></code> interface is meant to allow one to easily instantiate, compile, and run | |
grouped GEMM operations in CUTLASS via Python, without specifying many configuration parameters. | |
Under the hood, the interface will select sensible default parameters for the many template | |
parameters for CUTLASS grouped GEMMs.</p> | |
<p>Note: optimal performance is not to be expected from this interface. To achieve optimal | |
performance, one should specify and tune each configuration parameter.</p> | |
<p>The simplest example of using this interface is the following:</p> | |
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># As, Bs, Cs, and Ds are torch/numpy/cupy tensor objects</span> | |
<span class="n">plan</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">GroupedGemm</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f16</span><span class="p">,</span> <span class="n">layout</span><span class="o">=</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">)</span> | |
<span class="n">plan</span><span class="o">.</span><span class="n">run</span><span class="p">([</span><span class="n">A0</span><span class="p">,</span> <span class="n">A1</span><span class="p">],</span> <span class="p">[</span><span class="n">B0</span><span class="p">,</span> <span class="n">B1</span><span class="p">],</span> <span class="p">[</span><span class="n">C0</span><span class="p">,</span> <span class="n">C1</span><span class="p">],</span> <span class="p">[</span><span class="n">D0</span><span class="p">,</span> <span class="n">D1</span><span class="p">])</span> | |
</pre></div> | |
</div> | |
<dl class="py class"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm_grouped.GroupedGemm"> | |
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">cutlass.op.gemm_grouped.</span></span><span class="sig-name descname"><span class="pre">GroupedGemm</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">D</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">beta</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_accumulator</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">element_D</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout_A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout_B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layout_C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cc</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/gemm_grouped.html#GroupedGemm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.gemm_grouped.GroupedGemm" title="Permalink to this definition">#</a></dt> | |
<dd><p>Bases: <a class="reference internal" href="#cutlass.op.gemm.Gemm" title="cutlass.op.gemm.Gemm"><code class="xref py py-class docutils literal notranslate"><span class="pre">Gemm</span></code></a></p> | |
<p>Constructs a <code class="docutils literal notranslate"><span class="pre">GroupedGemm</span></code> object.</p> | |
<p>The data types and layouts of operands A, B, and C, along with the data type of output D | |
and that used for accumulation, are bound to the <code class="docutils literal notranslate"><span class="pre">GroupedGemm</span></code> object throughout its lifetime – | |
these are not to be changed after a <code class="docutils literal notranslate"><span class="pre">GroupedGemm</span></code> has been constructed.</p> | |
<p>The constructor has optional parameters for flexibly setting these parameters. Please see the constructor | |
for <code class="docutils literal notranslate"><span class="pre">Gemm</span></code> for examples of these.</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Parameters<span class="colon">:</span></dt> | |
<dd class="field-odd"><ul class="simple"> | |
<li><p><strong>cc</strong> (<em>int</em>) – compute capability of device to generate kernels for</p></li> | |
<li><p><strong>A</strong> – tensor representing data type and layout of operands A</p></li> | |
<li><p><strong>B</strong> – tensor representing data type and layout of operands B</p></li> | |
<li><p><strong>C</strong> – tensor representing data type and layout of operands C</p></li> | |
<li><p><strong>D</strong> – tensor representing data type and layout of operands D</p></li> | |
<li><p><strong>alpha</strong> – scalar paramter alpha from GEMM computation that scales the product of operands A and B</p></li> | |
<li><p><strong>beta</strong> – scalar parameter beta from GEMM operation that scales operand C</p></li> | |
<li><p><strong>element_accumulator</strong> (<em>cutlass.DataType</em>) – data type to be used in accumulation of the product of operands A and B</p></li> | |
<li><p><strong>element</strong> (<em>cutlass.DataType</em>) – generic data type to be used for operands A, B, C, D, as well as the accumulation data type</p></li> | |
<li><p><strong>layout</strong> (<em>cutlass.LayoutType</em>) – generic layout type to be used for operands A, B, C, and D</p></li> | |
<li><p><strong>element_A</strong> (<em>cutlass.DataType</em>) – data type to be used for operand A</p></li> | |
<li><p><strong>element_B</strong> (<em>cutlass.DataType</em>) – data type to be used for operand B</p></li> | |
<li><p><strong>element_C</strong> (<em>cutlass.DataType</em>) – data type to be used for operand C</p></li> | |
<li><p><strong>element_D</strong> (<em>cutlass.DataType</em>) – data type to be used for operand D</p></li> | |
<li><p><strong>layout_A</strong> (<em>layout</em><em> of </em><em>operand A</em>) – cutlass.LayoutType</p></li> | |
<li><p><strong>layout_B</strong> (<em>layout</em><em> of </em><em>operand B</em>) – cutlass.LayoutType</p></li> | |
<li><p><strong>layout_C</strong> (<em>layout</em><em> of </em><em>operand C</em>) – cutlass.LayoutType</p></li> | |
<li><p><strong>layout_D</strong> (<em>layout</em><em> of </em><em>operand D</em>) – cutlass.LayoutType</p></li> | |
</ul> | |
</dd> | |
</dl> | |
<dl class="py method"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm_grouped.GroupedGemm.construct"> | |
<span class="sig-name descname"><span class="pre">construct</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tile_description</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_A</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_B</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alignment_C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/gemm_grouped.html#GroupedGemm.construct"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.gemm_grouped.GroupedGemm.construct" title="Permalink to this definition">#</a></dt> | |
<dd><p>Constructs a <code class="docutils literal notranslate"><span class="pre">cutlass.backend.GemmOperationGrouped</span></code> based on the input parameters and current | |
kernel specification of the <code class="docutils literal notranslate"><span class="pre">Gemm</span></code> object.</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Parameters<span class="colon">:</span></dt> | |
<dd class="field-odd"><ul class="simple"> | |
<li><p><strong>tile_description</strong> (<em>cutlass.backend.TileDescription</em>) – tile description specifying shapes and operand types to use in the kernel</p></li> | |
<li><p><strong>alignment_A</strong> (<em>int</em>) – alignment of operand A</p></li> | |
<li><p><strong>alignment_B</strong> (<em>int</em>) – alignment of operand B</p></li> | |
<li><p><strong>alignment_C</strong> (<em>int</em>) – alignment of operand C</p></li> | |
</ul> | |
</dd> | |
<dt class="field-even">Returns<span class="colon">:</span></dt> | |
<dd class="field-even"><p>operation that was constructed</p> | |
</dd> | |
<dt class="field-odd">Return type<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>cutlass.backend.GemmOperationGrouped</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
<dl class="py method"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm_grouped.GroupedGemm.run"> | |
<span class="sig-name descname"><span class="pre">run</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">A</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">B</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">D</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">beta</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sync</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">print_module</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/gemm_grouped.html#GroupedGemm.run"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.gemm_grouped.GroupedGemm.run" title="Permalink to this definition">#</a></dt> | |
<dd><p>Runs the kernel currently specified.</p> | |
<p>By default, this call returns only once the kernel has completed. To launch the kernel | |
and immediately return, set <code class="docutils literal notranslate"><span class="pre">sync=False</span></code>. In this case, it is the responsibility of the | |
caller to syncrhonize the results of the kernel before attempting to access outputs | |
by calling <code class="docutils literal notranslate"><span class="pre">sync()</span></code> on the arguments returned from this call.</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Parameters<span class="colon">:</span></dt> | |
<dd class="field-odd"><ul class="simple"> | |
<li><p><strong>A</strong> (<em>list</em>) – list of tensors representing data type and layout of operand A</p></li> | |
<li><p><strong>B</strong> (<em>list</em>) – list of tensors representing data type and layout of operand B</p></li> | |
<li><p><strong>C</strong> (<em>list</em>) – list of tensors representing data type and layout of operand C</p></li> | |
<li><p><strong>D</strong> (<em>list</em>) – list of tensors representing data type and layout of operand D</p></li> | |
<li><p><strong>alpha</strong> – scalar paramter alpha from GEMM computation that scales the product of operands A and B</p></li> | |
<li><p><strong>beta</strong> – scalar parameter beta from GEMM operation that scales operand C</p></li> | |
<li><p><strong>sync</strong> (<em>bool</em>) – whether the call should wait for the kernel to complete before returning</p></li> | |
<li><p><strong>print_module</strong> (<em>bool</em>) – whether to print the emitted C++ code</p></li> | |
</ul> | |
</dd> | |
<dt class="field-even">Returns<span class="colon">:</span></dt> | |
<dd class="field-even"><p>arguments passed in to the kernel</p> | |
</dd> | |
<dt class="field-odd">Return type<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>cutlass.backend.GemmGroupedArguments</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
<dl class="py property"> | |
<dt class="sig sig-object py" id="cutlass.op.gemm_grouped.GroupedGemm.swizzling_functor"> | |
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">swizzling_functor</span></span><a class="headerlink" href="#cutlass.op.gemm_grouped.GroupedGemm.swizzling_functor" title="Permalink to this definition">#</a></dt> | |
<dd><p>Returns the type of the swizzling functor currently being used by the GEMM</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Returns<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>swizzing functor type</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
</dd></dl> | |
</section> | |
<section id="module-cutlass.op.op"> | |
<span id="operation"></span><h2>Operation<a class="headerlink" href="#module-cutlass.op.op" title="Permalink to this heading">#</a></h2> | |
<p>Base operation used for defining high-level CUTLASS operations (e.g., GEMM, Conv2d)</p> | |
<dl class="py class"> | |
<dt class="sig sig-object py" id="cutlass.op.op.OperationBase"> | |
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">cutlass.op.op.</span></span><span class="sig-name descname"><span class="pre">OperationBase</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cc</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kernel_cc</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/op.html#OperationBase"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.op.OperationBase" title="Permalink to this definition">#</a></dt> | |
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p> | |
<p>Base operation used for defining high-level CUTLASS operations (e.g., GEMM, Conv2d)</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Parameters<span class="colon">:</span></dt> | |
<dd class="field-odd"><ul class="simple"> | |
<li><p><strong>cc</strong> (<em>int</em>) – </p></li> | |
<li><p><strong>kernel_cc</strong> (<em>int</em>) – </p></li> | |
</ul> | |
</dd> | |
</dl> | |
<dl class="py method"> | |
<dt class="sig sig-object py" id="cutlass.op.op.OperationBase.activations"> | |
<span class="sig-name descname"><span class="pre">activations</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/op.html#OperationBase.activations"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.op.OperationBase.activations" title="Permalink to this definition">#</a></dt> | |
<dd><p>Returns possible activation functions that can be used</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Returns<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>list of activation functions that can be used</p> | |
</dd> | |
<dt class="field-even">Return type<span class="colon">:</span></dt> | |
<dd class="field-even"><p>list</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
<dl class="py method"> | |
<dt class="sig sig-object py" id="cutlass.op.op.OperationBase.swizzling_functors"> | |
<span class="sig-name descname"><span class="pre">swizzling_functors</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/cutlass/op/op.html#OperationBase.swizzling_functors"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#cutlass.op.op.OperationBase.swizzling_functors" title="Permalink to this definition">#</a></dt> | |
<dd><p>Returns possible swizzling functions that can be used</p> | |
<dl class="field-list simple"> | |
<dt class="field-odd">Returns<span class="colon">:</span></dt> | |
<dd class="field-odd"><p>list of swizzling functions that can be used</p> | |
</dd> | |
<dt class="field-even">Return type<span class="colon">:</span></dt> | |
<dd class="field-even"><p>list</p> | |
</dd> | |
</dl> | |
</dd></dl> | |
</dd></dl> | |
</section> | |
</section> | |
</article> | |
</div> | |
<footer> | |
<div class="related-pages"> | |
<a class="next-page" href="cutlass.utils.html"> | |
<div class="page-info"> | |
<div class="context"> | |
<span>Next</span> | |
</div> | |
<div class="title">Utilities</div> | |
</div> | |
<svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg> | |
</a> | |
<a class="prev-page" href="cutlass.emit.html"> | |
<svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg> | |
<div class="page-info"> | |
<div class="context"> | |
<span>Previous</span> | |
</div> | |
<div class="title">Emitters</div> | |
</div> | |
</a> | |
</div> | |
<div class="bottom-of-page"> | |
<div class="left-details"> | |
<div class="copyright"> | |
Copyright © 2023, NVIDIA | |
</div> | |
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s | |
<a href="https://github.com/pradyunsg/furo">Furo</a> | |
</div> | |
<div class="right-details"> | |
<div class="icons"> | |
<a class="muted-link " href="https://github.com/NVIDIA/cutlass" aria-label="GitHub"> | |
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16"> | |
<path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path> | |
</svg> | |
</a> | |
</div> | |
</div> | |
</div> | |
</footer> | |
</div> | |
<aside class="toc-drawer"> | |
<div class="toc-sticky toc-scroll"> | |
<div class="toc-title-container"> | |
<span class="toc-title"> | |
On this page | |
</span> | |
</div> | |
<div class="toc-tree-container"> | |
<div class="toc-tree"> | |
<ul> | |
<li><a class="reference internal" href="#">Operations</a><ul> | |
<li><a class="reference internal" href="#module-cutlass.op.gemm">GEMM</a><ul> | |
<li><a class="reference internal" href="#cutlass.op.gemm.Gemm"><code class="docutils literal notranslate"><span class="pre">Gemm</span></code></a><ul> | |
<li><a class="reference internal" href="#cutlass.op.gemm.Gemm.activation"><code class="docutils literal notranslate"><span class="pre">Gemm.activation</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.gemm.Gemm.compile"><code class="docutils literal notranslate"><span class="pre">Gemm.compile()</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.gemm.Gemm.construct"><code class="docutils literal notranslate"><span class="pre">Gemm.construct()</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.gemm.Gemm.opclass"><code class="docutils literal notranslate"><span class="pre">Gemm.opclass</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.gemm.Gemm.run"><code class="docutils literal notranslate"><span class="pre">Gemm.run()</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.gemm.Gemm.swizzling_functor"><code class="docutils literal notranslate"><span class="pre">Gemm.swizzling_functor</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.gemm.Gemm.tile_descriptions"><code class="docutils literal notranslate"><span class="pre">Gemm.tile_descriptions()</span></code></a></li> | |
</ul> | |
</li> | |
</ul> | |
</li> | |
<li><a class="reference internal" href="#module-cutlass.op.gemm_grouped">Grouped GEMM</a><ul> | |
<li><a class="reference internal" href="#cutlass.op.gemm_grouped.GroupedGemm"><code class="docutils literal notranslate"><span class="pre">GroupedGemm</span></code></a><ul> | |
<li><a class="reference internal" href="#cutlass.op.gemm_grouped.GroupedGemm.construct"><code class="docutils literal notranslate"><span class="pre">GroupedGemm.construct()</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.gemm_grouped.GroupedGemm.run"><code class="docutils literal notranslate"><span class="pre">GroupedGemm.run()</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.gemm_grouped.GroupedGemm.swizzling_functor"><code class="docutils literal notranslate"><span class="pre">GroupedGemm.swizzling_functor</span></code></a></li> | |
</ul> | |
</li> | |
</ul> | |
</li> | |
<li><a class="reference internal" href="#module-cutlass.op.op">Operation</a><ul> | |
<li><a class="reference internal" href="#cutlass.op.op.OperationBase"><code class="docutils literal notranslate"><span class="pre">OperationBase</span></code></a><ul> | |
<li><a class="reference internal" href="#cutlass.op.op.OperationBase.activations"><code class="docutils literal notranslate"><span class="pre">OperationBase.activations()</span></code></a></li> | |
<li><a class="reference internal" href="#cutlass.op.op.OperationBase.swizzling_functors"><code class="docutils literal notranslate"><span class="pre">OperationBase.swizzling_functors()</span></code></a></li> | |
</ul> | |
</li> | |
</ul> | |
</li> | |
</ul> | |
</li> | |
</ul> | |
</div> | |
</div> | |
</div> | |
</aside> | |
</div> | |
</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script> | |
<script src="_static/doctools.js"></script> | |
<script src="_static/sphinx_highlight.js"></script> | |
<script src="_static/scripts/furo.js"></script> | |
<script src="_static/clipboard.min.js"></script> | |
<script src="_static/copybutton.js"></script> | |
<script src="_static/tabs.js"></script> | |
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> | |
</body> | |
</html> |