|
|
|
|
|
""" The Python Hipify script. |
|
## |
|
# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. |
|
# 2017-2018 Advanced Micro Devices, Inc. and |
|
# Facebook Inc. All rights reserved. |
|
# |
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
|
# of this software and associated documentation files (the "Software"), to deal |
|
# in the Software without restriction, including without limitation the rights |
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
# copies of the Software, and to permit persons to whom the Software is |
|
# furnished to do so, subject to the following conditions: |
|
# |
|
# The above copyright notice and this permission notice shall be included in |
|
# all copies or substantial portions of the Software. |
|
# |
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|
# THE SOFTWARE. |
|
""" |
|
import argparse |
|
import fnmatch |
|
import re |
|
import shutil |
|
import sys |
|
import os |
|
|
|
from . import constants |
|
from .cuda_to_hip_mappings import CUDA_TO_HIP_MAPPINGS |
|
from .cuda_to_hip_mappings import MATH_TRANSPILATIONS |
|
|
|
from typing import Dict, List, Iterator, Optional |
|
from collections.abc import Mapping, Iterable |
|
from enum import Enum |
|
|
|
class CurrentState(Enum): |
|
INITIALIZED = 1 |
|
DONE = 2 |
|
|
|
class HipifyResult: |
|
def __init__(self, current_state, hipified_path): |
|
self.current_state = current_state |
|
self.hipified_path = hipified_path |
|
self.status = "" |
|
|
|
def __str__(self): |
|
return (f"HipifyResult:: current_state: {self.current_state}, hipified_path : {self.hipified_path}, status: {self.status}") |
|
|
|
HipifyFinalResult = Dict[str, HipifyResult] |
|
HIPIFY_C_BREADCRUMB = "// !!! This is a file automatically generated by hipify!!!\n" |
|
HIPIFY_FINAL_RESULT: HipifyFinalResult = {} |
|
|
|
|
|
"""This dictionary provides the mapping from PyTorch kernel template types |
|
to their actual types.""" |
|
PYTORCH_TEMPLATE_MAP = {"Dtype": "scalar_t", "T": "scalar_t"} |
|
|
|
__all__ = ['InputError', 'openf', 'bcolors', 'GeneratedFileCleaner', 'match_extensions', 'matched_files_iter', |
|
'preprocess_file_and_save_result', 'compute_stats', 'add_dim3', 'processKernelLaunches', 'find_closure_group', |
|
'find_bracket_group', 'find_parentheses_group', 'replace_math_functions', 'hip_header_magic', 'replace_extern_shared', |
|
'get_hip_file_path', 'is_out_of_place', 'is_pytorch_file', 'is_cusparse_file', 'is_special_file', 'is_caffe2_gpu_file', |
|
'is_caffe2_gpu_file', 'Trie', 'preprocessor', 'file_specific_replacement', 'file_add_header', |
|
'fix_static_global_kernels', 'extract_arguments', 'str2bool', 'CurrentState', 'HipifyResult', 'hipify'] |
|
|
|
|
|
class InputError(Exception): |
|
|
|
|
|
def __init__(self, message): |
|
super().__init__(message) |
|
self.message = message |
|
|
|
def __str__(self): |
|
return f"Input error: {self.message}" |
|
|
|
|
|
def openf(filename, mode): |
|
return open(filename, mode, errors='ignore') |
|
|
|
|
|
|
|
class bcolors: |
|
HEADER = '\033[95m' |
|
OKBLUE = '\033[94m' |
|
OKGREEN = '\033[92m' |
|
WARNING = '\033[93m' |
|
FAIL = '\033[91m' |
|
ENDC = '\033[0m' |
|
BOLD = '\033[1m' |
|
UNDERLINE = '\033[4m' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GeneratedFileCleaner: |
|
"""Context Manager to clean up generated files""" |
|
def __init__(self, keep_intermediates=False): |
|
self.keep_intermediates = keep_intermediates |
|
self.files_to_clean = set() |
|
self.dirs_to_clean = [] |
|
|
|
def __enter__(self): |
|
return self |
|
|
|
def open(self, fn, *args, **kwargs): |
|
if not os.path.exists(fn): |
|
self.files_to_clean.add(os.path.abspath(fn)) |
|
return open(fn, *args, **kwargs) |
|
|
|
def makedirs(self, dn, exist_ok=False): |
|
parent, n = os.path.split(dn) |
|
if not n: |
|
parent, n = os.path.split(parent) |
|
if parent and n and not os.path.exists(parent): |
|
self.makedirs(parent, exist_ok=True) |
|
if not os.path.isdir(dn) or not exist_ok: |
|
os.mkdir(dn) |
|
self.dirs_to_clean.append(os.path.abspath(dn)) |
|
|
|
def __exit__(self, type, value, traceback): |
|
if not self.keep_intermediates: |
|
for f in self.files_to_clean: |
|
os.unlink(f) |
|
for d in self.dirs_to_clean[::-1]: |
|
os.rmdir(d) |
|
|
|
|
|
def match_extensions(filename: str, extensions: Iterable) -> bool: |
|
"""Helper method to see if filename ends with certain extension""" |
|
return any(filename.endswith(e) for e in extensions) |
|
|
|
|
|
def _fnmatch(filepath, patterns): |
|
return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns) |
|
|
|
|
|
def matched_files_iter( |
|
root_path: str, |
|
includes: Iterable = (), |
|
ignores: Iterable = (), |
|
extensions: Iterable = (), |
|
out_of_place_only: bool = False, |
|
is_pytorch_extension: bool = False) -> Iterator[str]: |
|
|
|
exact_matches = set(includes) |
|
|
|
|
|
|
|
|
|
|
|
|
|
for (abs_dirpath, dirs, filenames) in os.walk(root_path, topdown=True): |
|
rel_dirpath = os.path.relpath(abs_dirpath, root_path) |
|
if rel_dirpath == '.': |
|
|
|
if ".git" in dirs: |
|
dirs.remove(".git") |
|
if "build" in dirs: |
|
dirs.remove("build") |
|
if "third_party" in dirs: |
|
dirs.remove("third_party") |
|
dirs.append("third_party/nvfuser") |
|
for filename in filenames: |
|
filepath = os.path.join(abs_dirpath, filename) |
|
rel_filepath = os.path.join(rel_dirpath, filename) |
|
|
|
|
|
if ( |
|
_fnmatch(filepath, includes) |
|
and (not _fnmatch(filepath, ignores)) |
|
and (match_extensions(filepath, extensions) or filepath in exact_matches) |
|
): |
|
if not is_pytorch_extension: |
|
if not is_pytorch_file(rel_filepath) and not is_caffe2_gpu_file(rel_filepath): |
|
continue |
|
if out_of_place_only and not is_out_of_place(rel_filepath): |
|
continue |
|
yield filepath |
|
|
|
|
|
def preprocess_file_and_save_result( |
|
output_directory: str, |
|
filepath: str, |
|
all_files: Iterable, |
|
header_include_dirs: Iterable, |
|
stats: Dict[str, List], |
|
hip_clang_launch: bool, |
|
is_pytorch_extension: bool, |
|
clean_ctx: GeneratedFileCleaner, |
|
show_progress: bool) -> None: |
|
fin_path = os.path.abspath(os.path.join(output_directory, filepath)) |
|
hipify_result = HipifyResult(current_state=CurrentState.INITIALIZED, hipified_path=fin_path) |
|
HIPIFY_FINAL_RESULT[fin_path] = hipify_result |
|
result = preprocessor(output_directory, filepath, all_files, header_include_dirs, stats, |
|
hip_clang_launch, is_pytorch_extension, clean_ctx, show_progress) |
|
|
|
|
|
if show_progress and "ignored" not in result.status: |
|
print( |
|
fin_path, "->", |
|
result.hipified_path, result.status, flush=True) |
|
|
|
HIPIFY_FINAL_RESULT[fin_path] = result |
|
|
|
|
|
def compute_stats(stats): |
|
unsupported_calls = {cuda_call for (cuda_call, _filepath) in stats["unsupported_calls"]} |
|
|
|
|
|
print(f"Total number of unsupported CUDA function calls: {len(unsupported_calls):d}") |
|
|
|
|
|
print(", ".join(unsupported_calls)) |
|
|
|
|
|
print(f"\nTotal number of replaced kernel launches: {len(stats['kernel_launches']):d}") |
|
|
|
|
|
def add_dim3(kernel_string, cuda_kernel): |
|
'''adds dim3() to the second and third arguments in the kernel launch''' |
|
count = 0 |
|
closure = 0 |
|
kernel_string = kernel_string.replace("<<<", "").replace(">>>", "") |
|
arg_locs: List[Dict[str, int]] = [{} for _ in range(2)] |
|
arg_locs[count]['start'] = 0 |
|
for ind, c in enumerate(kernel_string): |
|
if count > 1: |
|
break |
|
if c == "(": |
|
closure += 1 |
|
elif c == ")": |
|
closure -= 1 |
|
if (c == "," or ind == len(kernel_string) - 1) and closure == 0: |
|
arg_locs[count]['end'] = ind + (c != ",") |
|
count += 1 |
|
if count < 2: |
|
arg_locs[count]['start'] = ind + 1 |
|
|
|
first_arg_raw = kernel_string[arg_locs[0]['start']:arg_locs[0]['end'] + 1] |
|
second_arg_raw = kernel_string[arg_locs[1]['start']:arg_locs[1]['end']] |
|
|
|
first_arg_clean = kernel_string[arg_locs[0]['start']:arg_locs[0]['end']].replace("\n", "").strip(" ") |
|
second_arg_clean = kernel_string[arg_locs[1]['start']:arg_locs[1]['end']].replace("\n", "").strip(" ") |
|
|
|
first_arg_dim3 = f"dim3({first_arg_clean})" |
|
second_arg_dim3 = f"dim3({second_arg_clean})" |
|
|
|
first_arg_raw_dim3 = first_arg_raw.replace(first_arg_clean, first_arg_dim3) |
|
second_arg_raw_dim3 = second_arg_raw.replace(second_arg_clean, second_arg_dim3) |
|
cuda_kernel = cuda_kernel.replace(first_arg_raw + second_arg_raw, first_arg_raw_dim3 + second_arg_raw_dim3) |
|
return cuda_kernel |
|
|
|
|
|
RE_KERNEL_LAUNCH = re.compile(r'([ ]+)(detail?)::[ ]+\\\n[ ]+') |
|
|
|
|
|
def processKernelLaunches(string, stats): |
|
""" Replace the CUDA style Kernel launches with the HIP style kernel launches.""" |
|
|
|
string = RE_KERNEL_LAUNCH.sub(lambda inp: f"{inp.group(1)}{inp.group(2)}::", string) |
|
|
|
def grab_method_and_template(in_kernel): |
|
|
|
pos = { |
|
"kernel_launch": {"start": in_kernel["start"], "end": in_kernel["end"]}, |
|
"kernel_name": {"start": -1, "end": -1}, |
|
"template": {"start": -1, "end": -1} |
|
} |
|
|
|
|
|
count = {"<>": 0} |
|
|
|
|
|
START = 0 |
|
AT_TEMPLATE = 1 |
|
AFTER_TEMPLATE = 2 |
|
AT_KERNEL_NAME = 3 |
|
|
|
status = START |
|
|
|
|
|
for i in range(pos["kernel_launch"]["start"] - 1, -1, -1): |
|
char = string[i] |
|
|
|
|
|
if status in (START, AT_TEMPLATE): |
|
if char == ">": |
|
if status == START: |
|
status = AT_TEMPLATE |
|
pos["template"]["end"] = i |
|
count["<>"] += 1 |
|
|
|
if char == "<": |
|
count["<>"] -= 1 |
|
if count["<>"] == 0 and (status == AT_TEMPLATE): |
|
pos["template"]["start"] = i |
|
status = AFTER_TEMPLATE |
|
|
|
|
|
if status != AT_TEMPLATE: |
|
if string[i].isalnum() or string[i] in {'(', ')', '_', ':', '#'}: |
|
if status != AT_KERNEL_NAME: |
|
status = AT_KERNEL_NAME |
|
pos["kernel_name"]["end"] = i |
|
|
|
|
|
if i == 0: |
|
pos["kernel_name"]["start"] = 0 |
|
|
|
|
|
return [(pos["kernel_name"]), (pos["template"]), (pos["kernel_launch"])] |
|
|
|
else: |
|
|
|
if status == AT_KERNEL_NAME: |
|
pos["kernel_name"]["start"] = i |
|
|
|
|
|
return [(pos["kernel_name"]), (pos["template"]), (pos["kernel_launch"])] |
|
|
|
def find_kernel_bounds(string): |
|
"""Finds the starting and ending points for all kernel launches in the string.""" |
|
kernel_end = 0 |
|
kernel_positions = [] |
|
|
|
|
|
while string.find("<<<", kernel_end) != -1: |
|
|
|
kernel_start = string.find("<<<", kernel_end) |
|
|
|
|
|
kernel_end = string.find(">>>", kernel_start) + 3 |
|
if kernel_end <= 0: |
|
raise InputError("no kernel end found") |
|
|
|
|
|
kernel_positions.append({"start": kernel_start, "end": kernel_end, |
|
"group": string[kernel_start: kernel_end]}) |
|
|
|
return kernel_positions |
|
|
|
|
|
|
|
|
|
def mask_comments(string): |
|
in_comment = '' |
|
prev_c = '' |
|
new_string = '' |
|
for c in string: |
|
if in_comment == '': |
|
|
|
if c == '/' and prev_c == '/': |
|
in_comment = '//' |
|
elif c == '*' and prev_c == '/': |
|
in_comment = '/*' |
|
elif c == '"' and prev_c != '\\' and prev_c != "'": |
|
in_comment = '"' |
|
elif in_comment == '//': |
|
|
|
if c == '\r' or c == '\n': |
|
in_comment = '' |
|
elif in_comment == '/*': |
|
|
|
if c == '/' and prev_c == '*': |
|
in_comment = '' |
|
elif in_comment == '"': |
|
|
|
if c == '"' and prev_c != '\\': |
|
in_comment = '' |
|
prev_c = c |
|
if in_comment == '': |
|
new_string += c |
|
else: |
|
new_string += 'x' |
|
return new_string |
|
|
|
|
|
get_kernel_positions = list(find_kernel_bounds(mask_comments(string))) |
|
output_string = string |
|
|
|
|
|
for kernel in get_kernel_positions: |
|
|
|
params = grab_method_and_template(kernel) |
|
|
|
|
|
parenthesis = string.find("(", kernel["end"]) |
|
|
|
|
|
cuda_kernel = string[params[0]["start"]:parenthesis + 1] |
|
kernel_string = string[kernel['start']:kernel['end']] |
|
end_param_index = 0 if params[1]['end'] == -1 else 1 |
|
kernel_name_with_template = string[params[0]['start']:params[end_param_index]['end'] + 1] |
|
cuda_kernel_dim3 = add_dim3(kernel_string, cuda_kernel) |
|
|
|
num_klp = len(extract_arguments(0, kernel["group"].replace("<<<", "(").replace(">>>", ")"))) |
|
|
|
hip_kernel = "hipLaunchKernelGGL(" + cuda_kernel_dim3[0:-1].replace( |
|
">>>", ", 0" * (4 - num_klp) + ">>>").replace("<<<", ", ").replace( |
|
">>>", ", ").replace(kernel_name_with_template, "(" + kernel_name_with_template + ")") |
|
|
|
|
|
output_string = output_string.replace(cuda_kernel, hip_kernel) |
|
|
|
|
|
stats["kernel_launches"].append(hip_kernel) |
|
|
|
return output_string |
|
|
|
|
|
def find_closure_group(input_string, start, group): |
|
"""Generalization for finding a balancing closure group |
|
|
|
if group = ["(", ")"], then finds the first balanced parentheses. |
|
if group = ["{", "}"], then finds the first balanced bracket. |
|
|
|
Given an input string, a starting position in the input string, and the group type, |
|
find_closure_group returns the positions of group[0] and group[1] as a tuple. |
|
|
|
Example: |
|
>>> find_closure_group("(hi)", 0, ["(", ")"]) |
|
(0, 3) |
|
""" |
|
|
|
inside_parenthesis = False |
|
parens = 0 |
|
pos = start |
|
p_start, p_end = -1, -1 |
|
|
|
while pos < len(input_string): |
|
if input_string[pos] == group[0]: |
|
if inside_parenthesis is False: |
|
inside_parenthesis = True |
|
parens = 1 |
|
p_start = pos |
|
else: |
|
parens += 1 |
|
elif input_string[pos] == group[1] and inside_parenthesis: |
|
parens -= 1 |
|
|
|
if parens == 0: |
|
p_end = pos |
|
return p_start, p_end |
|
|
|
pos += 1 |
|
return None, None |
|
|
|
|
|
def find_bracket_group(input_string, start): |
|
"""Finds the first balanced parantheses.""" |
|
return find_closure_group(input_string, start, group=["{", "}"]) |
|
|
|
|
|
def find_parentheses_group(input_string, start): |
|
"""Finds the first balanced bracket.""" |
|
return find_closure_group(input_string, start, group=["(", ")"]) |
|
|
|
|
|
RE_ASSERT = re.compile(r"\bassert[ ]*\(") |
|
|
|
|
|
def replace_math_functions(input_string): |
|
"""FIXME: Temporarily replace std:: invocations of math functions |
|
with non-std:: versions to prevent linker errors NOTE: This |
|
can lead to correctness issues when running tests, since the |
|
correct version of the math function (exp/expf) might not get |
|
called. Plan is to remove this function once HIP supports |
|
std:: math function calls inside device code |
|
|
|
""" |
|
output_string = input_string |
|
for func in MATH_TRANSPILATIONS: |
|
output_string = output_string.replace(fr'{func}(', f'{MATH_TRANSPILATIONS[func]}(') |
|
|
|
return output_string |
|
|
|
|
|
RE_SYNCTHREADS = re.compile(r":?:?\b(__syncthreads)\b(\w*\()") |
|
|
|
|
|
def hip_header_magic(input_string): |
|
"""If the file makes kernel builtin calls and does not include the cuda_runtime.h header, |
|
then automatically add an #include to match the "magic" includes provided by NVCC. |
|
TODO: |
|
Update logic to ignore cases where the cuda_runtime.h is included by another file. |
|
""" |
|
|
|
|
|
output_string = input_string |
|
|
|
|
|
headers = ["hip/hip_runtime.h", "hip/hip_runtime_api.h"] |
|
if any(re.search(fr'#include ("{ext}"|<{ext}>)', output_string) for ext in headers): |
|
return output_string |
|
|
|
|
|
hasDeviceLogic: int |
|
hasDeviceLogic = "hipLaunchKernelGGL" in output_string |
|
hasDeviceLogic += "__global__" in output_string |
|
hasDeviceLogic += "__shared__" in output_string |
|
hasDeviceLogic += RE_SYNCTHREADS.search(output_string) is not None |
|
|
|
|
|
if hasDeviceLogic: |
|
output_string = '#include "hip/hip_runtime.h"\n' + input_string |
|
|
|
return output_string |
|
|
|
|
|
RE_EXTERN_SHARED = re.compile(r"extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;") |
|
|
|
|
|
def replace_extern_shared(input_string): |
|
"""Match extern __shared__ type foo[]; syntax and use HIP_DYNAMIC_SHARED() MACRO instead. |
|
https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md#__shared__ |
|
Example: |
|
"extern __shared__ char smemChar[];" => "HIP_DYNAMIC_SHARED( char, smemChar)" |
|
"extern __shared__ unsigned char smem[];" => "HIP_DYNAMIC_SHARED( unsigned char, my_smem)" |
|
""" |
|
output_string = input_string |
|
output_string = RE_EXTERN_SHARED.sub( |
|
lambda inp: f"HIP_DYNAMIC_SHARED({inp.group(1) or ''} {inp.group(2)}, {inp.group(3)})", output_string) |
|
|
|
return output_string |
|
|
|
|
|
def get_hip_file_path(rel_filepath, is_pytorch_extension=False): |
|
""" |
|
Returns the new name of the hipified file |
|
""" |
|
|
|
|
|
assert not os.path.isabs(rel_filepath) |
|
if not is_pytorch_extension and not is_out_of_place(rel_filepath): |
|
return rel_filepath |
|
|
|
dirpath, filename = os.path.split(rel_filepath) |
|
root, ext = os.path.splitext(filename) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ext == '.cu': |
|
ext = '.hip' |
|
|
|
orig_filename = filename |
|
orig_dirpath = dirpath |
|
|
|
dirpath = dirpath.replace('cuda', 'hip') |
|
dirpath = dirpath.replace('CUDA', 'HIP') |
|
dirpath = dirpath.replace('THC', 'THH') |
|
|
|
root = root.replace('cuda', 'hip') |
|
root = root.replace('CUDA', 'HIP') |
|
|
|
if dirpath != "caffe2/core": |
|
root = root.replace('THC', 'THH') |
|
|
|
if not is_pytorch_extension and dirpath == orig_dirpath: |
|
dirpath = os.path.join(dirpath, 'hip') |
|
|
|
if is_pytorch_extension and dirpath == orig_dirpath and (root + ext) == orig_filename: |
|
root = root + "_hip" |
|
|
|
return os.path.join(dirpath, root + ext) |
|
|
|
|
|
def is_out_of_place(rel_filepath): |
|
assert not os.path.isabs(rel_filepath) |
|
if rel_filepath.startswith("torch/"): |
|
return False |
|
if rel_filepath.startswith("third_party/nvfuser/"): |
|
return False |
|
if rel_filepath.startswith("tools/autograd/templates/"): |
|
return False |
|
return True |
|
|
|
|
|
|
|
def is_pytorch_file(rel_filepath): |
|
assert not os.path.isabs(rel_filepath) |
|
if rel_filepath.startswith("aten/"): |
|
if rel_filepath.startswith("aten/src/ATen/core/"): |
|
return False |
|
return True |
|
if rel_filepath.startswith("torch/"): |
|
return True |
|
if rel_filepath.startswith("third_party/nvfuser/"): |
|
return True |
|
if rel_filepath.startswith("tools/autograd/templates/"): |
|
return True |
|
return False |
|
|
|
|
|
def is_cusparse_file(rel_filepath): |
|
if is_pytorch_file(rel_filepath): |
|
return "sparse" in rel_filepath.lower() |
|
return False |
|
|
|
|
|
def is_special_file(rel_filepath): |
|
if is_pytorch_file(rel_filepath): |
|
if "sparse" in rel_filepath.lower(): |
|
return True |
|
elif "linalg" in rel_filepath.lower(): |
|
if "batchlinearalgebralibblas" in rel_filepath.lower(): |
|
return False |
|
return True |
|
return False |
|
|
|
def is_caffe2_gpu_file(rel_filepath): |
|
assert not os.path.isabs(rel_filepath) |
|
if rel_filepath.startswith("c10/cuda"): |
|
return True |
|
filename = os.path.basename(rel_filepath) |
|
_, ext = os.path.splitext(filename) |
|
return ('gpu' in filename or ext in ['.cu', '.cuh']) and ('cudnn' not in filename) |
|
|
|
class TrieNode: |
|
"""A Trie node whose children are represented as a directory of char: TrieNode. |
|
A special char '' represents end of word |
|
""" |
|
|
|
def __init__(self): |
|
self.children = {} |
|
|
|
class Trie: |
|
"""Creates a Trie out of a list of words. The trie can be exported to a Regex pattern. |
|
The corresponding Regex should match much faster than a simple Regex union.""" |
|
|
|
def __init__(self): |
|
"""Initialize the trie with an empty root node.""" |
|
self.root = TrieNode() |
|
|
|
def add(self, word): |
|
"""Add a word to the Trie. """ |
|
node = self.root |
|
|
|
for char in word: |
|
node.children.setdefault(char, TrieNode()) |
|
node = node.children[char] |
|
node.children[''] = True |
|
|
|
def dump(self): |
|
"""Return the root node of Trie. """ |
|
return self.root |
|
|
|
def quote(self, char): |
|
""" Escape a char for regex. """ |
|
return re.escape(char) |
|
|
|
def search(self, word): |
|
"""Search whether word is present in the Trie. |
|
Returns True if yes, else return False""" |
|
node = self.root |
|
for char in word: |
|
if char in node.children: |
|
node = node.children[char] |
|
else: |
|
return False |
|
|
|
|
|
return '' in node.children |
|
|
|
def _pattern(self, root): |
|
"""Convert a Trie into a regular expression pattern""" |
|
node = root |
|
|
|
if "" in node.children and len(node.children.keys()) == 1: |
|
return None |
|
|
|
alt = [] |
|
cc = [] |
|
q = 0 |
|
for char in sorted(node.children.keys()): |
|
if isinstance(node.children[char], TrieNode): |
|
try: |
|
recurse = self._pattern(node.children[char]) |
|
alt.append(self.quote(char) + recurse) |
|
except Exception: |
|
cc.append(self.quote(char)) |
|
else: |
|
q = 1 |
|
cconly = not len(alt) > 0 |
|
|
|
if len(cc) > 0: |
|
if len(cc) == 1: |
|
alt.append(cc[0]) |
|
else: |
|
alt.append('[' + ''.join(cc) + ']') |
|
|
|
if len(alt) == 1: |
|
result = alt[0] |
|
else: |
|
result = "(?:" + "|".join(alt) + ")" |
|
|
|
if q: |
|
if cconly: |
|
result += "?" |
|
else: |
|
result = f"(?:{result})?" |
|
return result |
|
|
|
def pattern(self): |
|
"""Export the Trie to a regex pattern.""" |
|
return self._pattern(self.root) |
|
|
|
def export_to_regex(self): |
|
"""Export the Trie to a regex pattern.""" |
|
return self._pattern(self.root) |
|
|
|
CAFFE2_TRIE = Trie() |
|
CAFFE2_MAP = {} |
|
PYTORCH_TRIE = Trie() |
|
PYTORCH_MAP: Dict[str, object] = {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PYTORCH_SPECIAL_MAP = {} |
|
|
|
for mapping in CUDA_TO_HIP_MAPPINGS: |
|
assert isinstance(mapping, Mapping) |
|
for src, value in mapping.items(): |
|
dst = value[0] |
|
meta_data = value[1:] |
|
if constants.API_CAFFE2 not in meta_data: |
|
PYTORCH_TRIE.add(src) |
|
|
|
|
|
if constants.API_SPECIAL in meta_data and PYTORCH_MAP.get(src, ""): |
|
PYTORCH_SPECIAL_MAP[src] = dst |
|
else: |
|
PYTORCH_MAP[src] = dst |
|
if constants.API_PYTORCH not in meta_data and constants.API_SPECIAL not in meta_data: |
|
CAFFE2_TRIE.add(src) |
|
CAFFE2_MAP[src] = dst |
|
RE_CAFFE2_PREPROCESSOR = re.compile(CAFFE2_TRIE.export_to_regex()) |
|
RE_PYTORCH_PREPROCESSOR = re.compile(fr'(?<=\W)({PYTORCH_TRIE.export_to_regex()})(?=\W)') |
|
|
|
RE_QUOTE_HEADER = re.compile(r'#include "([^"]+)"') |
|
RE_ANGLE_HEADER = re.compile(r'#include <([^>]+)>') |
|
RE_THC_GENERIC_FILE = re.compile(r'#define THC_GENERIC_FILE "([^"]+)"') |
|
RE_CU_SUFFIX = re.compile(r'\.cu\b') |
|
|
|
""" |
|
Returns a HipifyResult object with the following details: |
|
"hipified_path" : absolute path of hipified source file |
|
"status" : "ok" if hipified file was written out |
|
"skipped" if an identical hipified file already existed or hipified file couldn't be written out |
|
"ignored" if the source file was a hipified file itself or not meant to be hipified |
|
"current_state" : CurrentState.INITIALIZED if source file is first ready to be hipified |
|
CurrentState.DONE if source file is done with hipification process |
|
""" |
|
|
|
|
|
def preprocessor( |
|
output_directory: str, |
|
filepath: str, |
|
all_files: Iterable, |
|
header_include_dirs: Iterable, |
|
stats: Dict[str, List], |
|
hip_clang_launch: bool, |
|
is_pytorch_extension: bool, |
|
clean_ctx: GeneratedFileCleaner, |
|
show_progress: bool) -> HipifyResult: |
|
""" Executes the CUDA -> HIP conversion on the specified file. """ |
|
fin_path = os.path.abspath(os.path.join(output_directory, filepath)) |
|
hipify_result = HIPIFY_FINAL_RESULT[fin_path] |
|
if filepath not in all_files: |
|
hipify_result.hipified_path = None |
|
hipify_result.status = "[ignored, not to be hipified]" |
|
hipify_result.current_state = CurrentState.DONE |
|
return hipify_result |
|
|
|
rel_filepath = os.path.relpath(filepath, output_directory) |
|
|
|
with open(fin_path, encoding='utf-8') as fin: |
|
if fin.readline() == HIPIFY_C_BREADCRUMB: |
|
hipify_result.hipified_path = None |
|
hipify_result.status = "[ignored, input is hipified output]" |
|
hipify_result.current_state = CurrentState.DONE |
|
return hipify_result |
|
fin.seek(0) |
|
output_source = fin.read() |
|
|
|
orig_output_source = output_source |
|
|
|
|
|
fout_path = os.path.abspath(os.path.join(output_directory, get_hip_file_path(rel_filepath, is_pytorch_extension))) |
|
if not os.path.exists(os.path.dirname(fout_path)): |
|
clean_ctx.makedirs(os.path.dirname(fout_path)) |
|
|
|
|
|
def pt_repl(m): |
|
return PYTORCH_MAP[m.group(0)] |
|
|
|
def pt_special_repl(m): |
|
|
|
return PYTORCH_SPECIAL_MAP.get(m.group(0), pt_repl(m)) |
|
|
|
|
|
if is_pytorch_extension: |
|
output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_repl, output_source) |
|
else: |
|
if is_special_file(rel_filepath): |
|
output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_special_repl, output_source) |
|
elif is_pytorch_file(rel_filepath): |
|
output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_repl, output_source) |
|
else: |
|
def c2_repl(m): |
|
return CAFFE2_MAP[m.group(0)] |
|
output_source = RE_CAFFE2_PREPROCESSOR.sub(c2_repl, output_source) |
|
|
|
|
|
def mk_repl(templ, include_current_dir=True): |
|
def repl(m): |
|
f = m.group(1) |
|
dirpath, filename = os.path.split(f) |
|
if ( |
|
f.startswith(("ATen/cuda", |
|
"ATen/native/cuda", |
|
"ATen/native/nested/cuda", |
|
"ATen/native/quantized/cuda", |
|
"ATen/native/sparse/cuda", |
|
"ATen/native/transformers/cuda", |
|
"THC/")) or |
|
(f.startswith("THC") and not f.startswith("THCP")) |
|
): |
|
return templ.format(get_hip_file_path(m.group(1), is_pytorch_extension)) |
|
|
|
if (is_pytorch_extension and any(s.endswith(filename) for s in all_files)): |
|
header_dir = None |
|
header_filepath = None |
|
|
|
if include_current_dir: |
|
header_dir_to_check = os.path.dirname(fin_path) |
|
header_path_to_check = os.path.abspath(os.path.join(header_dir_to_check, f)) |
|
if os.path.exists(header_path_to_check): |
|
header_dir = header_dir_to_check |
|
header_filepath = header_path_to_check |
|
|
|
if header_filepath is None: |
|
for header_include_dir in header_include_dirs: |
|
header_dir_to_check = os.path.join(output_directory, header_include_dir) |
|
header_path_to_check = os.path.abspath(os.path.join(header_dir_to_check, f)) |
|
if os.path.exists(header_path_to_check): |
|
header_dir = header_dir_to_check |
|
header_filepath = header_path_to_check |
|
|
|
if header_filepath is None: |
|
return m.group(0) |
|
|
|
if header_filepath not in HIPIFY_FINAL_RESULT: |
|
preprocess_file_and_save_result(output_directory, |
|
header_filepath, |
|
all_files, header_include_dirs, stats, hip_clang_launch, |
|
is_pytorch_extension, clean_ctx, show_progress) |
|
elif header_filepath in HIPIFY_FINAL_RESULT: |
|
header_result = HIPIFY_FINAL_RESULT[header_filepath] |
|
if header_result.current_state == CurrentState.INITIALIZED: |
|
|
|
header_rel_path = os.path.relpath(header_filepath, output_directory) |
|
header_fout_path = os.path.abspath(os.path.join(output_directory, |
|
get_hip_file_path(header_rel_path, is_pytorch_extension))) |
|
header_result.hipified_path = header_fout_path |
|
HIPIFY_FINAL_RESULT[header_filepath] = header_result |
|
return templ.format(os.path.relpath(header_fout_path if header_fout_path is not None |
|
else header_filepath, header_dir)) |
|
hipified_header_filepath = HIPIFY_FINAL_RESULT[header_filepath].hipified_path |
|
return templ.format(os.path.relpath(hipified_header_filepath if hipified_header_filepath is not None |
|
else header_filepath, header_dir)) |
|
|
|
return m.group(0) |
|
return repl |
|
output_source = RE_QUOTE_HEADER.sub(mk_repl('#include "{0}"', True), output_source) |
|
output_source = RE_ANGLE_HEADER.sub(mk_repl('#include <{0}>', False), output_source) |
|
output_source = RE_THC_GENERIC_FILE.sub(mk_repl('#define THC_GENERIC_FILE "{0}"'), output_source) |
|
|
|
|
|
if filepath.endswith('CMakeLists.txt'): |
|
output_source = output_source.replace('CUDA', 'HIP') |
|
output_source = output_source.replace('THC', 'THH') |
|
output_source = RE_CU_SUFFIX.sub('.hip', output_source) |
|
|
|
|
|
if not hip_clang_launch: |
|
output_source = processKernelLaunches(output_source, stats) |
|
|
|
|
|
if (filepath.endswith((".cu", ".cuh"))) and "PowKernel" not in filepath: |
|
output_source = replace_math_functions(output_source) |
|
|
|
|
|
output_source = hip_header_magic(output_source) |
|
|
|
|
|
|
|
|
|
|
|
|
|
if ( |
|
is_pytorch_extension |
|
and orig_output_source == output_source |
|
and os.path.dirname(fin_path) == os.path.dirname(fout_path) |
|
): |
|
hipify_result.hipified_path = fin_path |
|
hipify_result.status = "[skipped, no changes]" |
|
hipify_result.current_state = CurrentState.DONE |
|
return hipify_result |
|
|
|
|
|
if fin_path != fout_path and match_extensions(fin_path, (".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".hpp")): |
|
output_source = HIPIFY_C_BREADCRUMB + output_source |
|
|
|
do_write = True |
|
if os.path.exists(fout_path): |
|
with open(fout_path, encoding='utf-8') as fout_old: |
|
do_write = fout_old.read() != output_source |
|
if do_write: |
|
try: |
|
with clean_ctx.open(fout_path, 'w', encoding='utf-8') as fout: |
|
fout.write(output_source) |
|
hipify_result.hipified_path = fout_path |
|
hipify_result.status = "[ok]" |
|
hipify_result.current_state = CurrentState.DONE |
|
return hipify_result |
|
except PermissionError as e: |
|
print(f'{bcolors.WARNING}Failed to save {fout_path} with "{e.strerror}", leaving {fin_path} unchanged.{bcolors.ENDC}', |
|
file=sys.stderr) |
|
hipify_result.hipified_path = fin_path |
|
hipify_result.status = "[skipped, no permissions]" |
|
hipify_result.current_state = CurrentState.DONE |
|
return hipify_result |
|
else: |
|
hipify_result.hipified_path = fout_path |
|
hipify_result.status = "[skipped, already hipified]" |
|
hipify_result.current_state = CurrentState.DONE |
|
return hipify_result |
|
|
|
def file_specific_replacement(filepath, search_string, replace_string, strict=False): |
|
with openf(filepath, "r+") as f: |
|
contents = f.read() |
|
if strict: |
|
contents = re.sub(fr'\b({re.escape(search_string)})\b', lambda x: replace_string, contents) |
|
else: |
|
contents = contents.replace(search_string, replace_string) |
|
f.seek(0) |
|
f.write(contents) |
|
f.truncate() |
|
|
|
|
|
def file_add_header(filepath, header): |
|
with openf(filepath, "r+") as f: |
|
contents = f.read() |
|
if header[0] != "<" and header[-1] != ">": |
|
header = f'"{header}"' |
|
contents = (f'#include {header} \n') + contents |
|
f.seek(0) |
|
f.write(contents) |
|
f.truncate() |
|
|
|
|
|
def fix_static_global_kernels(in_txt): |
|
"""Static global kernels in HIP results in a compilation error.""" |
|
in_txt = in_txt.replace(" __global__ static", "__global__") |
|
return in_txt |
|
|
|
|
|
RE_INCLUDE = re.compile(r"#include .*\n") |
|
|
|
|
|
def extract_arguments(start, string): |
|
""" Return the list of arguments in the upcoming function parameter closure. |
|
Example: |
|
string (input): '(blocks, threads, 0, THCState_getCurrentStream(state))' |
|
arguments (output): |
|
'[{'start': 1, 'end': 7}, |
|
{'start': 8, 'end': 16}, |
|
{'start': 17, 'end': 19}, |
|
{'start': 20, 'end': 53}]' |
|
""" |
|
|
|
arguments = [] |
|
closures = { |
|
"<": 0, |
|
"(": 0 |
|
} |
|
current_position = start |
|
argument_start_pos = current_position + 1 |
|
|
|
|
|
while current_position < len(string): |
|
if string[current_position] == "(": |
|
closures["("] += 1 |
|
elif string[current_position] == ")": |
|
closures["("] -= 1 |
|
elif string[current_position] == "<": |
|
closures["<"] += 1 |
|
elif string[current_position] == ">" and string[current_position - 1] != "-" and closures["<"] > 0: |
|
closures["<"] -= 1 |
|
|
|
|
|
if closures["("] == 0 and closures["<"] == 0: |
|
|
|
arguments.append({"start": argument_start_pos, "end": current_position}) |
|
break |
|
|
|
|
|
if closures["("] == 1 and closures["<"] == 0 and string[current_position] == ",": |
|
arguments.append({"start": argument_start_pos, "end": current_position}) |
|
argument_start_pos = current_position + 1 |
|
|
|
current_position += 1 |
|
|
|
return arguments |
|
|
|
|
|
def str2bool(v): |
|
"""ArgumentParser doesn't support type=bool. Thus, this helper method will convert |
|
from possible string types to True / False.""" |
|
if v.lower() in ('yes', 'true', 't', 'y', '1'): |
|
return True |
|
elif v.lower() in ('no', 'false', 'f', 'n', '0'): |
|
return False |
|
else: |
|
raise argparse.ArgumentTypeError('Boolean value expected.') |
|
|
|
|
|
def hipify( |
|
project_directory: str, |
|
show_detailed: bool = False, |
|
extensions: Iterable = (".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".in", ".hpp"), |
|
header_extensions: Iterable = (".cuh", ".h", ".hpp"), |
|
output_directory: str = "", |
|
header_include_dirs: Iterable = (), |
|
includes: Iterable = ('*',), |
|
extra_files: Iterable = (), |
|
out_of_place_only: bool = False, |
|
ignores: Iterable = (), |
|
show_progress: bool = True, |
|
hip_clang_launch: bool = False, |
|
is_pytorch_extension: bool = False, |
|
hipify_extra_files_only: bool = False, |
|
clean_ctx: Optional[GeneratedFileCleaner] = None |
|
) -> HipifyFinalResult: |
|
if project_directory == "": |
|
project_directory = os.getcwd() |
|
|
|
|
|
if not os.path.exists(project_directory): |
|
print("The project folder specified does not exist.") |
|
sys.exit(1) |
|
|
|
|
|
if not output_directory: |
|
project_directory.rstrip("/") |
|
output_directory = project_directory + "_amd" |
|
|
|
if project_directory != output_directory: |
|
includes = [include.replace(project_directory, output_directory) for include in includes] |
|
ignores = [ignore.replace(project_directory, output_directory) for ignore in ignores] |
|
|
|
|
|
if not os.path.exists(output_directory): |
|
shutil.copytree(project_directory, output_directory) |
|
|
|
all_files = list(matched_files_iter(output_directory, includes=includes, |
|
ignores=ignores, extensions=extensions, |
|
out_of_place_only=out_of_place_only, |
|
is_pytorch_extension=is_pytorch_extension)) |
|
all_files_set = set(all_files) |
|
for f in extra_files: |
|
if not os.path.isabs(f): |
|
f = os.path.join(output_directory, f) |
|
if f not in all_files_set: |
|
all_files.append(f) |
|
|
|
|
|
from pathlib import Path |
|
for header_include_dir in header_include_dirs: |
|
if os.path.isabs(header_include_dir): |
|
header_include_dir_path = Path(header_include_dir) |
|
else: |
|
header_include_dir_path = Path(os.path.join(output_directory, header_include_dir)) |
|
for path in header_include_dir_path.rglob('*'): |
|
if ( |
|
path.is_file() |
|
and _fnmatch(str(path), includes) |
|
and (not _fnmatch(str(path), ignores)) |
|
and match_extensions(path.name, header_extensions) |
|
): |
|
all_files.append(str(path)) |
|
|
|
if clean_ctx is None: |
|
clean_ctx = GeneratedFileCleaner(keep_intermediates=True) |
|
|
|
|
|
stats: Dict[str, List] = {"unsupported_calls": [], "kernel_launches": []} |
|
|
|
for filepath in (all_files if not hipify_extra_files_only else extra_files): |
|
preprocess_file_and_save_result(output_directory, filepath, all_files, header_include_dirs, |
|
stats, hip_clang_launch, is_pytorch_extension, clean_ctx, show_progress) |
|
|
|
print(bcolors.OKGREEN + "Successfully preprocessed all matching files." + bcolors.ENDC, file=sys.stderr) |
|
|
|
|
|
if show_detailed: |
|
compute_stats(stats) |
|
|
|
return HIPIFY_FINAL_RESULT |
|
|