Spaces:
Running
Running
File size: 1,727 Bytes
c61ccee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import contextlib
import tempfile
import torch
from . import check_error, cudart
__all__ = ["init", "start", "stop", "profile"]
DEFAULT_FLAGS = [
"gpustarttimestamp",
"gpuendtimestamp",
"gridsize3d",
"threadblocksize",
"streamid",
"enableonstart 0",
"conckerneltrace",
]
def init(output_file, flags=None, output_mode="key_value"):
rt = cudart()
if not hasattr(rt, "cudaOutputMode"):
raise AssertionError("HIP does not support profiler initialization!")
if (
hasattr(torch.version, "cuda")
and torch.version.cuda is not None
and int(torch.version.cuda.split(".")[0]) >= 12
):
# Check https://github.com/pytorch/pytorch/pull/91118
# cudaProfilerInitialize is no longer needed after CUDA 12
raise AssertionError("CUDA12+ does not need profiler initialization!")
flags = DEFAULT_FLAGS if flags is None else flags
if output_mode == "key_value":
output_mode_enum = rt.cudaOutputMode.KeyValuePair
elif output_mode == "csv":
output_mode_enum = rt.cudaOutputMode.CSV
else:
raise RuntimeError(
"supported CUDA profiler output modes are: key_value and csv"
)
with tempfile.NamedTemporaryFile(delete=True) as f:
f.write(b"\n".join(f.encode("ascii") for f in flags))
f.flush()
check_error(rt.cudaProfilerInitialize(f.name, output_file, output_mode_enum))
def start():
check_error(cudart().cudaProfilerStart())
def stop():
check_error(cudart().cudaProfilerStop())
@contextlib.contextmanager
def profile():
try:
start()
yield
finally:
stop()
|