|
"""Provides the `CCompilerOpt` class, used for handling the CPU/hardware |
|
optimization, starting from parsing the command arguments, to managing the |
|
relation between the CPU baseline and dispatch-able features, |
|
also generating the required C headers and ending with compiling |
|
the sources with proper compiler's flags. |
|
|
|
`CCompilerOpt` doesn't provide runtime detection for the CPU features, |
|
instead only focuses on the compiler side, but it creates abstract C headers |
|
that can be used later for the final runtime dispatching process.""" |
|
|
|
import atexit |
|
import inspect |
|
import os |
|
import pprint |
|
import re |
|
import subprocess |
|
import textwrap |
|
|
|
class _Config: |
|
"""An abstract class holds all configurable attributes of `CCompilerOpt`, |
|
these class attributes can be used to change the default behavior |
|
of `CCompilerOpt` in order to fit other requirements. |
|
|
|
Attributes |
|
---------- |
|
conf_nocache : bool |
|
Set True to disable memory and file cache. |
|
Default is False. |
|
|
|
conf_noopt : bool |
|
Set True to forces the optimization to be disabled, |
|
in this case `CCompilerOpt` tends to generate all |
|
expected headers in order to 'not' break the build. |
|
Default is False. |
|
|
|
conf_cache_factors : list |
|
Add extra factors to the primary caching factors. The caching factors |
|
are utilized to determine if there are changes had happened that |
|
requires to discard the cache and re-updating it. The primary factors |
|
are the arguments of `CCompilerOpt` and `CCompiler`'s properties(type, flags, etc). |
|
Default is list of two items, containing the time of last modification |
|
of `ccompiler_opt` and value of attribute "conf_noopt" |
|
|
|
conf_tmp_path : str, |
|
The path of temporary directory. Default is auto-created |
|
temporary directory via ``tempfile.mkdtemp()``. |
|
|
|
conf_check_path : str |
|
The path of testing files. Each added CPU feature must have a |
|
**C** source file contains at least one intrinsic or instruction that |
|
related to this feature, so it can be tested against the compiler. |
|
Default is ``./distutils/checks``. |
|
|
|
conf_target_groups : dict |
|
Extra tokens that can be reached from dispatch-able sources through |
|
the special mark ``@targets``. Default is an empty dictionary. |
|
|
|
**Notes**: |
|
- case-insensitive for tokens and group names |
|
- sign '#' must stick in the begin of group name and only within ``@targets`` |
|
|
|
**Example**: |
|
.. code-block:: console |
|
|
|
$ "@targets #avx_group other_tokens" > group_inside.c |
|
|
|
>>> CCompilerOpt.conf_target_groups["avx_group"] = \\ |
|
"$werror $maxopt avx2 avx512f avx512_skx" |
|
>>> cco = CCompilerOpt(cc_instance) |
|
>>> cco.try_dispatch(["group_inside.c"]) |
|
|
|
conf_c_prefix : str |
|
The prefix of public C definitions. Default is ``"NPY_"``. |
|
|
|
conf_c_prefix_ : str |
|
The prefix of internal C definitions. Default is ``"NPY__"``. |
|
|
|
conf_cc_flags : dict |
|
Nested dictionaries defining several compiler flags |
|
that linked to some major functions, the main key |
|
represent the compiler name and sub-keys represent |
|
flags names. Default is already covers all supported |
|
**C** compilers. |
|
|
|
Sub-keys explained as follows: |
|
|
|
"native": str or None |
|
used by argument option `native`, to detect the current |
|
machine support via the compiler. |
|
"werror": str or None |
|
utilized to treat warning as errors during testing CPU features |
|
against the compiler and also for target's policy `$werror` |
|
via dispatch-able sources. |
|
"maxopt": str or None |
|
utilized for target's policy '$maxopt' and the value should |
|
contains the maximum acceptable optimization by the compiler. |
|
e.g. in gcc ``'-O3'`` |
|
|
|
**Notes**: |
|
* case-sensitive for compiler names and flags |
|
* use space to separate multiple flags |
|
* any flag will tested against the compiler and it will skipped |
|
if it's not applicable. |
|
|
|
conf_min_features : dict |
|
A dictionary defines the used CPU features for |
|
argument option ``'min'``, the key represent the CPU architecture |
|
name e.g. ``'x86'``. Default values provide the best effort |
|
on wide range of users platforms. |
|
|
|
**Note**: case-sensitive for architecture names. |
|
|
|
conf_features : dict |
|
Nested dictionaries used for identifying the CPU features. |
|
the primary key is represented as a feature name or group name |
|
that gathers several features. Default values covers all |
|
supported features but without the major options like "flags", |
|
these undefined options handle it by method `conf_features_partial()`. |
|
Default value is covers almost all CPU features for *X86*, *IBM/Power64* |
|
and *ARM 7/8*. |
|
|
|
Sub-keys explained as follows: |
|
|
|
"implies" : str or list, optional, |
|
List of CPU feature names to be implied by it, |
|
the feature name must be defined within `conf_features`. |
|
Default is None. |
|
|
|
"flags": str or list, optional |
|
List of compiler flags. Default is None. |
|
|
|
"detect": str or list, optional |
|
List of CPU feature names that required to be detected |
|
in runtime. By default, its the feature name or features |
|
in "group" if its specified. |
|
|
|
"implies_detect": bool, optional |
|
If True, all "detect" of implied features will be combined. |
|
Default is True. see `feature_detect()`. |
|
|
|
"group": str or list, optional |
|
Same as "implies" but doesn't require the feature name to be |
|
defined within `conf_features`. |
|
|
|
"interest": int, required |
|
a key for sorting CPU features |
|
|
|
"headers": str or list, optional |
|
intrinsics C header file |
|
|
|
"disable": str, optional |
|
force disable feature, the string value should contains the |
|
reason of disabling. |
|
|
|
"autovec": bool or None, optional |
|
True or False to declare that CPU feature can be auto-vectorized |
|
by the compiler. |
|
By default(None), treated as True if the feature contains at |
|
least one applicable flag. see `feature_can_autovec()` |
|
|
|
"extra_checks": str or list, optional |
|
Extra test case names for the CPU feature that need to be tested |
|
against the compiler. |
|
|
|
Each test case must have a C file named ``extra_xxxx.c``, where |
|
``xxxx`` is the case name in lower case, under 'conf_check_path'. |
|
It should contain at least one intrinsic or function related to the test case. |
|
|
|
If the compiler able to successfully compile the C file then `CCompilerOpt` |
|
will add a C ``#define`` for it into the main dispatch header, e.g. |
|
``#define {conf_c_prefix}_XXXX`` where ``XXXX`` is the case name in upper case. |
|
|
|
**NOTES**: |
|
* space can be used as separator with options that supports "str or list" |
|
* case-sensitive for all values and feature name must be in upper-case. |
|
* if flags aren't applicable, its will skipped rather than disable the |
|
CPU feature |
|
* the CPU feature will disabled if the compiler fail to compile |
|
the test file |
|
""" |
|
conf_nocache = False |
|
conf_noopt = False |
|
conf_cache_factors = None |
|
conf_tmp_path = None |
|
conf_check_path = os.path.join( |
|
os.path.dirname(os.path.realpath(__file__)), "checks" |
|
) |
|
conf_target_groups = {} |
|
conf_c_prefix = 'NPY_' |
|
conf_c_prefix_ = 'NPY__' |
|
conf_cc_flags = dict( |
|
gcc = dict( |
|
|
|
|
|
native = '-march=native', |
|
opt = '-O3', |
|
werror = '-Werror', |
|
), |
|
clang = dict( |
|
native = '-march=native', |
|
opt = "-O3", |
|
|
|
|
|
|
|
|
|
|
|
werror = '-Werror=switch -Werror', |
|
), |
|
icc = dict( |
|
native = '-xHost', |
|
opt = '-O3', |
|
werror = '-Werror', |
|
), |
|
iccw = dict( |
|
native = '/QxHost', |
|
opt = '/O3', |
|
werror = '/Werror', |
|
), |
|
msvc = dict( |
|
native = None, |
|
opt = '/O2', |
|
werror = '/WX', |
|
), |
|
fcc = dict( |
|
native = '-mcpu=a64fx', |
|
opt = None, |
|
werror = None, |
|
) |
|
) |
|
conf_min_features = dict( |
|
x86 = "SSE SSE2", |
|
x64 = "SSE SSE2 SSE3", |
|
ppc64 = '', |
|
ppc64le = "VSX VSX2", |
|
s390x = '', |
|
armhf = '', |
|
aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD" |
|
) |
|
conf_features = dict( |
|
|
|
SSE = dict( |
|
interest=1, headers="xmmintrin.h", |
|
|
|
|
|
implies="SSE2" |
|
), |
|
SSE2 = dict(interest=2, implies="SSE", headers="emmintrin.h"), |
|
SSE3 = dict(interest=3, implies="SSE2", headers="pmmintrin.h"), |
|
SSSE3 = dict(interest=4, implies="SSE3", headers="tmmintrin.h"), |
|
SSE41 = dict(interest=5, implies="SSSE3", headers="smmintrin.h"), |
|
POPCNT = dict(interest=6, implies="SSE41", headers="popcntintrin.h"), |
|
SSE42 = dict(interest=7, implies="POPCNT"), |
|
AVX = dict( |
|
interest=8, implies="SSE42", headers="immintrin.h", |
|
implies_detect=False |
|
), |
|
XOP = dict(interest=9, implies="AVX", headers="x86intrin.h"), |
|
FMA4 = dict(interest=10, implies="AVX", headers="x86intrin.h"), |
|
F16C = dict(interest=11, implies="AVX"), |
|
FMA3 = dict(interest=12, implies="F16C"), |
|
AVX2 = dict(interest=13, implies="F16C"), |
|
AVX512F = dict( |
|
interest=20, implies="FMA3 AVX2", implies_detect=False, |
|
extra_checks="AVX512F_REDUCE" |
|
), |
|
AVX512CD = dict(interest=21, implies="AVX512F"), |
|
AVX512_KNL = dict( |
|
interest=40, implies="AVX512CD", group="AVX512ER AVX512PF", |
|
detect="AVX512_KNL", implies_detect=False |
|
), |
|
AVX512_KNM = dict( |
|
interest=41, implies="AVX512_KNL", |
|
group="AVX5124FMAPS AVX5124VNNIW AVX512VPOPCNTDQ", |
|
detect="AVX512_KNM", implies_detect=False |
|
), |
|
AVX512_SKX = dict( |
|
interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ", |
|
detect="AVX512_SKX", implies_detect=False, |
|
extra_checks="AVX512BW_MASK AVX512DQ_MASK" |
|
), |
|
AVX512_CLX = dict( |
|
interest=43, implies="AVX512_SKX", group="AVX512VNNI", |
|
detect="AVX512_CLX" |
|
), |
|
AVX512_CNL = dict( |
|
interest=44, implies="AVX512_SKX", group="AVX512IFMA AVX512VBMI", |
|
detect="AVX512_CNL", implies_detect=False |
|
), |
|
AVX512_ICL = dict( |
|
interest=45, implies="AVX512_CLX AVX512_CNL", |
|
group="AVX512VBMI2 AVX512BITALG AVX512VPOPCNTDQ", |
|
detect="AVX512_ICL", implies_detect=False |
|
), |
|
AVX512_SPR = dict( |
|
interest=46, implies="AVX512_ICL", group="AVX512FP16", |
|
detect="AVX512_SPR", implies_detect=False |
|
), |
|
|
|
|
|
VSX = dict(interest=1, headers="altivec.h", extra_checks="VSX_ASM"), |
|
|
|
VSX2 = dict(interest=2, implies="VSX", implies_detect=False), |
|
|
|
VSX3 = dict(interest=3, implies="VSX2", implies_detect=False, |
|
extra_checks="VSX3_HALF_DOUBLE"), |
|
|
|
VSX4 = dict(interest=4, implies="VSX3", implies_detect=False, |
|
extra_checks="VSX4_MMA"), |
|
|
|
|
|
VX = dict(interest=1, headers="vecintrin.h"), |
|
|
|
VXE = dict(interest=2, implies="VX", implies_detect=False), |
|
|
|
VXE2 = dict(interest=3, implies="VXE", implies_detect=False), |
|
|
|
NEON = dict(interest=1, headers="arm_neon.h"), |
|
NEON_FP16 = dict(interest=2, implies="NEON"), |
|
|
|
NEON_VFPV4 = dict(interest=3, implies="NEON_FP16"), |
|
|
|
ASIMD = dict(interest=4, implies="NEON_FP16 NEON_VFPV4", implies_detect=False), |
|
|
|
ASIMDHP = dict(interest=5, implies="ASIMD"), |
|
|
|
ASIMDDP = dict(interest=6, implies="ASIMD"), |
|
|
|
ASIMDFHM = dict(interest=7, implies="ASIMDHP"), |
|
) |
|
def conf_features_partial(self): |
|
"""Return a dictionary of supported CPU features by the platform, |
|
and accumulate the rest of undefined options in `conf_features`, |
|
the returned dict has same rules and notes in |
|
class attribute `conf_features`, also its override |
|
any options that been set in 'conf_features'. |
|
""" |
|
if self.cc_noopt: |
|
|
|
return {} |
|
|
|
on_x86 = self.cc_on_x86 or self.cc_on_x64 |
|
is_unix = self.cc_is_gcc or self.cc_is_clang or self.cc_is_fcc |
|
|
|
if on_x86 and is_unix: return dict( |
|
SSE = dict(flags="-msse"), |
|
SSE2 = dict(flags="-msse2"), |
|
SSE3 = dict(flags="-msse3"), |
|
SSSE3 = dict(flags="-mssse3"), |
|
SSE41 = dict(flags="-msse4.1"), |
|
POPCNT = dict(flags="-mpopcnt"), |
|
SSE42 = dict(flags="-msse4.2"), |
|
AVX = dict(flags="-mavx"), |
|
F16C = dict(flags="-mf16c"), |
|
XOP = dict(flags="-mxop"), |
|
FMA4 = dict(flags="-mfma4"), |
|
FMA3 = dict(flags="-mfma"), |
|
AVX2 = dict(flags="-mavx2"), |
|
AVX512F = dict(flags="-mavx512f -mno-mmx"), |
|
AVX512CD = dict(flags="-mavx512cd"), |
|
AVX512_KNL = dict(flags="-mavx512er -mavx512pf"), |
|
AVX512_KNM = dict( |
|
flags="-mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq" |
|
), |
|
AVX512_SKX = dict(flags="-mavx512vl -mavx512bw -mavx512dq"), |
|
AVX512_CLX = dict(flags="-mavx512vnni"), |
|
AVX512_CNL = dict(flags="-mavx512ifma -mavx512vbmi"), |
|
AVX512_ICL = dict( |
|
flags="-mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq" |
|
), |
|
AVX512_SPR = dict(flags="-mavx512fp16"), |
|
) |
|
if on_x86 and self.cc_is_icc: return dict( |
|
SSE = dict(flags="-msse"), |
|
SSE2 = dict(flags="-msse2"), |
|
SSE3 = dict(flags="-msse3"), |
|
SSSE3 = dict(flags="-mssse3"), |
|
SSE41 = dict(flags="-msse4.1"), |
|
POPCNT = {}, |
|
SSE42 = dict(flags="-msse4.2"), |
|
AVX = dict(flags="-mavx"), |
|
F16C = {}, |
|
XOP = dict(disable="Intel Compiler doesn't support it"), |
|
FMA4 = dict(disable="Intel Compiler doesn't support it"), |
|
|
|
FMA3 = dict( |
|
implies="F16C AVX2", flags="-march=core-avx2" |
|
), |
|
AVX2 = dict(implies="FMA3", flags="-march=core-avx2"), |
|
|
|
AVX512F = dict( |
|
implies="AVX2 AVX512CD", flags="-march=common-avx512" |
|
), |
|
AVX512CD = dict( |
|
implies="AVX2 AVX512F", flags="-march=common-avx512" |
|
), |
|
AVX512_KNL = dict(flags="-xKNL"), |
|
AVX512_KNM = dict(flags="-xKNM"), |
|
AVX512_SKX = dict(flags="-xSKYLAKE-AVX512"), |
|
AVX512_CLX = dict(flags="-xCASCADELAKE"), |
|
AVX512_CNL = dict(flags="-xCANNONLAKE"), |
|
AVX512_ICL = dict(flags="-xICELAKE-CLIENT"), |
|
AVX512_SPR = dict(disable="Not supported yet") |
|
) |
|
if on_x86 and self.cc_is_iccw: return dict( |
|
SSE = dict(flags="/arch:SSE"), |
|
SSE2 = dict(flags="/arch:SSE2"), |
|
SSE3 = dict(flags="/arch:SSE3"), |
|
SSSE3 = dict(flags="/arch:SSSE3"), |
|
SSE41 = dict(flags="/arch:SSE4.1"), |
|
POPCNT = {}, |
|
SSE42 = dict(flags="/arch:SSE4.2"), |
|
AVX = dict(flags="/arch:AVX"), |
|
F16C = {}, |
|
XOP = dict(disable="Intel Compiler doesn't support it"), |
|
FMA4 = dict(disable="Intel Compiler doesn't support it"), |
|
|
|
FMA3 = dict( |
|
implies="F16C AVX2", flags="/arch:CORE-AVX2" |
|
), |
|
AVX2 = dict( |
|
implies="FMA3", flags="/arch:CORE-AVX2" |
|
), |
|
|
|
AVX512F = dict( |
|
implies="AVX2 AVX512CD", flags="/Qx:COMMON-AVX512" |
|
), |
|
AVX512CD = dict( |
|
implies="AVX2 AVX512F", flags="/Qx:COMMON-AVX512" |
|
), |
|
AVX512_KNL = dict(flags="/Qx:KNL"), |
|
AVX512_KNM = dict(flags="/Qx:KNM"), |
|
AVX512_SKX = dict(flags="/Qx:SKYLAKE-AVX512"), |
|
AVX512_CLX = dict(flags="/Qx:CASCADELAKE"), |
|
AVX512_CNL = dict(flags="/Qx:CANNONLAKE"), |
|
AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT"), |
|
AVX512_SPR = dict(disable="Not supported yet") |
|
) |
|
if on_x86 and self.cc_is_msvc: return dict( |
|
SSE = dict(flags="/arch:SSE") if self.cc_on_x86 else {}, |
|
SSE2 = dict(flags="/arch:SSE2") if self.cc_on_x86 else {}, |
|
SSE3 = {}, |
|
SSSE3 = {}, |
|
SSE41 = {}, |
|
POPCNT = dict(headers="nmmintrin.h"), |
|
SSE42 = {}, |
|
AVX = dict(flags="/arch:AVX"), |
|
F16C = {}, |
|
XOP = dict(headers="ammintrin.h"), |
|
FMA4 = dict(headers="ammintrin.h"), |
|
|
|
FMA3 = dict( |
|
implies="F16C AVX2", flags="/arch:AVX2" |
|
), |
|
AVX2 = dict( |
|
implies="F16C FMA3", flags="/arch:AVX2" |
|
), |
|
|
|
|
|
AVX512F = dict( |
|
implies="AVX2 AVX512CD AVX512_SKX", flags="/arch:AVX512" |
|
), |
|
AVX512CD = dict( |
|
implies="AVX512F AVX512_SKX", flags="/arch:AVX512" |
|
), |
|
AVX512_KNL = dict( |
|
disable="MSVC compiler doesn't support it" |
|
), |
|
AVX512_KNM = dict( |
|
disable="MSVC compiler doesn't support it" |
|
), |
|
AVX512_SKX = dict(flags="/arch:AVX512"), |
|
AVX512_CLX = {}, |
|
AVX512_CNL = {}, |
|
AVX512_ICL = {}, |
|
AVX512_SPR= dict( |
|
disable="MSVC compiler doesn't support it" |
|
) |
|
) |
|
|
|
on_power = self.cc_on_ppc64le or self.cc_on_ppc64 |
|
if on_power: |
|
partial = dict( |
|
VSX = dict( |
|
implies=("VSX2" if self.cc_on_ppc64le else ""), |
|
flags="-mvsx" |
|
), |
|
VSX2 = dict( |
|
flags="-mcpu=power8", implies_detect=False |
|
), |
|
VSX3 = dict( |
|
flags="-mcpu=power9 -mtune=power9", implies_detect=False |
|
), |
|
VSX4 = dict( |
|
flags="-mcpu=power10 -mtune=power10", implies_detect=False |
|
) |
|
) |
|
if self.cc_is_clang: |
|
partial["VSX"]["flags"] = "-maltivec -mvsx" |
|
partial["VSX2"]["flags"] = "-mcpu=power8" |
|
partial["VSX3"]["flags"] = "-mcpu=power9" |
|
partial["VSX4"]["flags"] = "-mcpu=power10" |
|
|
|
return partial |
|
|
|
on_zarch = self.cc_on_s390x |
|
if on_zarch: |
|
partial = dict( |
|
VX = dict( |
|
flags="-march=arch11 -mzvector" |
|
), |
|
VXE = dict( |
|
flags="-march=arch12", implies_detect=False |
|
), |
|
VXE2 = dict( |
|
flags="-march=arch13", implies_detect=False |
|
) |
|
) |
|
|
|
return partial |
|
|
|
|
|
if self.cc_on_aarch64 and is_unix: return dict( |
|
NEON = dict( |
|
implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True |
|
), |
|
NEON_FP16 = dict( |
|
implies="NEON NEON_VFPV4 ASIMD", autovec=True |
|
), |
|
NEON_VFPV4 = dict( |
|
implies="NEON NEON_FP16 ASIMD", autovec=True |
|
), |
|
ASIMD = dict( |
|
implies="NEON NEON_FP16 NEON_VFPV4", autovec=True |
|
), |
|
ASIMDHP = dict( |
|
flags="-march=armv8.2-a+fp16" |
|
), |
|
ASIMDDP = dict( |
|
flags="-march=armv8.2-a+dotprod" |
|
), |
|
ASIMDFHM = dict( |
|
flags="-march=armv8.2-a+fp16fml" |
|
), |
|
) |
|
if self.cc_on_armhf and is_unix: return dict( |
|
NEON = dict( |
|
flags="-mfpu=neon" |
|
), |
|
NEON_FP16 = dict( |
|
flags="-mfpu=neon-fp16 -mfp16-format=ieee" |
|
), |
|
NEON_VFPV4 = dict( |
|
flags="-mfpu=neon-vfpv4", |
|
), |
|
ASIMD = dict( |
|
flags="-mfpu=neon-fp-armv8 -march=armv8-a+simd", |
|
), |
|
ASIMDHP = dict( |
|
flags="-march=armv8.2-a+fp16" |
|
), |
|
ASIMDDP = dict( |
|
flags="-march=armv8.2-a+dotprod", |
|
), |
|
ASIMDFHM = dict( |
|
flags="-march=armv8.2-a+fp16fml" |
|
) |
|
) |
|
|
|
return {} |
|
|
|
def __init__(self): |
|
if self.conf_tmp_path is None: |
|
import shutil |
|
import tempfile |
|
tmp = tempfile.mkdtemp() |
|
def rm_temp(): |
|
try: |
|
shutil.rmtree(tmp) |
|
except OSError: |
|
pass |
|
atexit.register(rm_temp) |
|
self.conf_tmp_path = tmp |
|
|
|
if self.conf_cache_factors is None: |
|
self.conf_cache_factors = [ |
|
os.path.getmtime(__file__), |
|
self.conf_nocache |
|
] |
|
|
|
class _Distutils: |
|
"""A helper class that provides a collection of fundamental methods |
|
implemented in a top of Python and NumPy Distutils. |
|
|
|
The idea behind this class is to gather all methods that it may |
|
need to override in case of reuse 'CCompilerOpt' in environment |
|
different than of what NumPy has. |
|
|
|
Parameters |
|
---------- |
|
ccompiler : `CCompiler` |
|
The generate instance that returned from `distutils.ccompiler.new_compiler()`. |
|
""" |
|
def __init__(self, ccompiler): |
|
self._ccompiler = ccompiler |
|
|
|
def dist_compile(self, sources, flags, ccompiler=None, **kwargs): |
|
"""Wrap CCompiler.compile()""" |
|
assert(isinstance(sources, list)) |
|
assert(isinstance(flags, list)) |
|
flags = kwargs.pop("extra_postargs", []) + flags |
|
if not ccompiler: |
|
ccompiler = self._ccompiler |
|
|
|
return ccompiler.compile(sources, extra_postargs=flags, **kwargs) |
|
|
|
def dist_test(self, source, flags, macros=[]): |
|
"""Return True if 'CCompiler.compile()' able to compile |
|
a source file with certain flags. |
|
""" |
|
assert(isinstance(source, str)) |
|
from distutils.errors import CompileError |
|
cc = self._ccompiler; |
|
bk_spawn = getattr(cc, 'spawn', None) |
|
if bk_spawn: |
|
cc_type = getattr(self._ccompiler, "compiler_type", "") |
|
if cc_type in ("msvc",): |
|
setattr(cc, 'spawn', self._dist_test_spawn_paths) |
|
else: |
|
setattr(cc, 'spawn', self._dist_test_spawn) |
|
test = False |
|
try: |
|
self.dist_compile( |
|
[source], flags, macros=macros, output_dir=self.conf_tmp_path |
|
) |
|
test = True |
|
except CompileError as e: |
|
self.dist_log(str(e), stderr=True) |
|
if bk_spawn: |
|
setattr(cc, 'spawn', bk_spawn) |
|
return test |
|
|
|
def dist_info(self): |
|
""" |
|
Return a tuple containing info about (platform, compiler, extra_args), |
|
required by the abstract class '_CCompiler' for discovering the |
|
platform environment. This is also used as a cache factor in order |
|
to detect any changes happening from outside. |
|
""" |
|
if hasattr(self, "_dist_info"): |
|
return self._dist_info |
|
|
|
cc_type = getattr(self._ccompiler, "compiler_type", '') |
|
if cc_type in ("intelem", "intelemw"): |
|
platform = "x86_64" |
|
elif cc_type in ("intel", "intelw", "intele"): |
|
platform = "x86" |
|
else: |
|
from distutils.util import get_platform |
|
platform = get_platform() |
|
|
|
cc_info = getattr(self._ccompiler, "compiler", getattr(self._ccompiler, "compiler_so", '')) |
|
if not cc_type or cc_type == "unix": |
|
if hasattr(cc_info, "__iter__"): |
|
compiler = cc_info[0] |
|
else: |
|
compiler = str(cc_info) |
|
else: |
|
compiler = cc_type |
|
|
|
if hasattr(cc_info, "__iter__") and len(cc_info) > 1: |
|
extra_args = ' '.join(cc_info[1:]) |
|
else: |
|
extra_args = os.environ.get("CFLAGS", "") |
|
extra_args += os.environ.get("CPPFLAGS", "") |
|
|
|
self._dist_info = (platform, compiler, extra_args) |
|
return self._dist_info |
|
|
|
@staticmethod |
|
def dist_error(*args): |
|
"""Raise a compiler error""" |
|
from distutils.errors import CompileError |
|
raise CompileError(_Distutils._dist_str(*args)) |
|
|
|
@staticmethod |
|
def dist_fatal(*args): |
|
"""Raise a distutils error""" |
|
from distutils.errors import DistutilsError |
|
raise DistutilsError(_Distutils._dist_str(*args)) |
|
|
|
@staticmethod |
|
def dist_log(*args, stderr=False): |
|
"""Print a console message""" |
|
from numpy.distutils import log |
|
out = _Distutils._dist_str(*args) |
|
if stderr: |
|
log.warn(out) |
|
else: |
|
log.info(out) |
|
|
|
@staticmethod |
|
def dist_load_module(name, path): |
|
"""Load a module from file, required by the abstract class '_Cache'.""" |
|
from .misc_util import exec_mod_from_location |
|
try: |
|
return exec_mod_from_location(name, path) |
|
except Exception as e: |
|
_Distutils.dist_log(e, stderr=True) |
|
return None |
|
|
|
@staticmethod |
|
def _dist_str(*args): |
|
"""Return a string to print by log and errors.""" |
|
def to_str(arg): |
|
if not isinstance(arg, str) and hasattr(arg, '__iter__'): |
|
ret = [] |
|
for a in arg: |
|
ret.append(to_str(a)) |
|
return '('+ ' '.join(ret) + ')' |
|
return str(arg) |
|
|
|
stack = inspect.stack()[2] |
|
start = "CCompilerOpt.%s[%d] : " % (stack.function, stack.lineno) |
|
out = ' '.join([ |
|
to_str(a) |
|
for a in (*args,) |
|
]) |
|
return start + out |
|
|
|
def _dist_test_spawn_paths(self, cmd, display=None): |
|
""" |
|
Fix msvc SDK ENV path same as distutils do |
|
without it we get c1: fatal error C1356: unable to find mspdbcore.dll |
|
""" |
|
if not hasattr(self._ccompiler, "_paths"): |
|
self._dist_test_spawn(cmd) |
|
return |
|
old_path = os.getenv("path") |
|
try: |
|
os.environ["path"] = self._ccompiler._paths |
|
self._dist_test_spawn(cmd) |
|
finally: |
|
os.environ["path"] = old_path |
|
|
|
_dist_warn_regex = re.compile( |
|
|
|
|
|
".*(" |
|
"warning D9002|" |
|
"invalid argument for option" |
|
").*" |
|
) |
|
@staticmethod |
|
def _dist_test_spawn(cmd, display=None): |
|
try: |
|
o = subprocess.check_output(cmd, stderr=subprocess.STDOUT, |
|
text=True) |
|
if o and re.match(_Distutils._dist_warn_regex, o): |
|
_Distutils.dist_error( |
|
"Flags in command", cmd ,"aren't supported by the compiler" |
|
", output -> \n%s" % o |
|
) |
|
except subprocess.CalledProcessError as exc: |
|
o = exc.output |
|
s = exc.returncode |
|
except OSError as e: |
|
o = e |
|
s = 127 |
|
else: |
|
return None |
|
_Distutils.dist_error( |
|
"Command", cmd, "failed with exit status %d output -> \n%s" % ( |
|
s, o |
|
)) |
|
|
|
_share_cache = {} |
|
class _Cache: |
|
"""An abstract class handles caching functionality, provides two |
|
levels of caching, in-memory by share instances attributes among |
|
each other and by store attributes into files. |
|
|
|
**Note**: |
|
any attributes that start with ``_`` or ``conf_`` will be ignored. |
|
|
|
Parameters |
|
---------- |
|
cache_path : str or None |
|
The path of cache file, if None then cache in file will disabled. |
|
|
|
*factors : |
|
The caching factors that need to utilize next to `conf_cache_factors`. |
|
|
|
Attributes |
|
---------- |
|
cache_private : set |
|
Hold the attributes that need be skipped from "in-memory cache". |
|
|
|
cache_infile : bool |
|
Utilized during initializing this class, to determine if the cache was able |
|
to loaded from the specified cache path in 'cache_path'. |
|
""" |
|
|
|
|
|
_cache_ignore = re.compile("^(_|conf_)") |
|
|
|
def __init__(self, cache_path=None, *factors): |
|
self.cache_me = {} |
|
self.cache_private = set() |
|
self.cache_infile = False |
|
self._cache_path = None |
|
|
|
if self.conf_nocache: |
|
self.dist_log("cache is disabled by `Config`") |
|
return |
|
|
|
self._cache_hash = self.cache_hash(*factors, *self.conf_cache_factors) |
|
self._cache_path = cache_path |
|
if cache_path: |
|
if os.path.exists(cache_path): |
|
self.dist_log("load cache from file ->", cache_path) |
|
cache_mod = self.dist_load_module("cache", cache_path) |
|
if not cache_mod: |
|
self.dist_log( |
|
"unable to load the cache file as a module", |
|
stderr=True |
|
) |
|
elif not hasattr(cache_mod, "hash") or \ |
|
not hasattr(cache_mod, "data"): |
|
self.dist_log("invalid cache file", stderr=True) |
|
elif self._cache_hash == cache_mod.hash: |
|
self.dist_log("hit the file cache") |
|
for attr, val in cache_mod.data.items(): |
|
setattr(self, attr, val) |
|
self.cache_infile = True |
|
else: |
|
self.dist_log("miss the file cache") |
|
|
|
if not self.cache_infile: |
|
other_cache = _share_cache.get(self._cache_hash) |
|
if other_cache: |
|
self.dist_log("hit the memory cache") |
|
for attr, val in other_cache.__dict__.items(): |
|
if attr in other_cache.cache_private or \ |
|
re.match(self._cache_ignore, attr): |
|
continue |
|
setattr(self, attr, val) |
|
|
|
_share_cache[self._cache_hash] = self |
|
atexit.register(self.cache_flush) |
|
|
|
def __del__(self): |
|
for h, o in _share_cache.items(): |
|
if o == self: |
|
_share_cache.pop(h) |
|
break |
|
|
|
def cache_flush(self): |
|
""" |
|
Force update the cache. |
|
""" |
|
if not self._cache_path: |
|
return |
|
|
|
self.dist_log("write cache to path ->", self._cache_path) |
|
cdict = self.__dict__.copy() |
|
for attr in self.__dict__.keys(): |
|
if re.match(self._cache_ignore, attr): |
|
cdict.pop(attr) |
|
|
|
d = os.path.dirname(self._cache_path) |
|
if not os.path.exists(d): |
|
os.makedirs(d) |
|
|
|
repr_dict = pprint.pformat(cdict, compact=True) |
|
with open(self._cache_path, "w") as f: |
|
f.write(textwrap.dedent("""\ |
|
# AUTOGENERATED DON'T EDIT |
|
# Please make changes to the code generator \ |
|
(distutils/ccompiler_opt.py) |
|
hash = {} |
|
data = \\ |
|
""").format(self._cache_hash)) |
|
f.write(repr_dict) |
|
|
|
def cache_hash(self, *factors): |
|
|
|
|
|
chash = 0 |
|
for f in factors: |
|
for char in str(f): |
|
chash = ord(char) + (chash << 6) + (chash << 16) - chash |
|
chash &= 0xFFFFFFFF |
|
return chash |
|
|
|
@staticmethod |
|
def me(cb): |
|
""" |
|
A static method that can be treated as a decorator to |
|
dynamically cache certain methods. |
|
""" |
|
def cache_wrap_me(self, *args, **kwargs): |
|
|
|
cache_key = str(( |
|
cb.__name__, *args, *kwargs.keys(), *kwargs.values() |
|
)) |
|
if cache_key in self.cache_me: |
|
return self.cache_me[cache_key] |
|
ccb = cb(self, *args, **kwargs) |
|
self.cache_me[cache_key] = ccb |
|
return ccb |
|
return cache_wrap_me |
|
|
|
class _CCompiler: |
|
"""A helper class for `CCompilerOpt` containing all utilities that |
|
related to the fundamental compiler's functions. |
|
|
|
Attributes |
|
---------- |
|
cc_on_x86 : bool |
|
True when the target architecture is 32-bit x86 |
|
cc_on_x64 : bool |
|
True when the target architecture is 64-bit x86 |
|
cc_on_ppc64 : bool |
|
True when the target architecture is 64-bit big-endian powerpc |
|
cc_on_ppc64le : bool |
|
True when the target architecture is 64-bit litle-endian powerpc |
|
cc_on_s390x : bool |
|
True when the target architecture is IBM/ZARCH on linux |
|
cc_on_armhf : bool |
|
True when the target architecture is 32-bit ARMv7+ |
|
cc_on_aarch64 : bool |
|
True when the target architecture is 64-bit Armv8-a+ |
|
cc_on_noarch : bool |
|
True when the target architecture is unknown or not supported |
|
cc_is_gcc : bool |
|
True if the compiler is GNU or |
|
if the compiler is unknown |
|
cc_is_clang : bool |
|
True if the compiler is Clang |
|
cc_is_icc : bool |
|
True if the compiler is Intel compiler (unix like) |
|
cc_is_iccw : bool |
|
True if the compiler is Intel compiler (msvc like) |
|
cc_is_nocc : bool |
|
True if the compiler isn't supported directly, |
|
Note: that cause a fail-back to gcc |
|
cc_has_debug : bool |
|
True if the compiler has debug flags |
|
cc_has_native : bool |
|
True if the compiler has native flags |
|
cc_noopt : bool |
|
True if the compiler has definition 'DISABLE_OPT*', |
|
or 'cc_on_noarch' is True |
|
cc_march : str |
|
The target architecture name, or "unknown" if |
|
the architecture isn't supported |
|
cc_name : str |
|
The compiler name, or "unknown" if the compiler isn't supported |
|
cc_flags : dict |
|
Dictionary containing the initialized flags of `_Config.conf_cc_flags` |
|
""" |
|
def __init__(self): |
|
if hasattr(self, "cc_is_cached"): |
|
return |
|
|
|
detect_arch = ( |
|
("cc_on_x64", ".*(x|x86_|amd)64.*", ""), |
|
("cc_on_x86", ".*(win32|x86|i386|i686).*", ""), |
|
("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*|.*powerpc.*", |
|
"defined(__powerpc64__) && " |
|
"defined(__LITTLE_ENDIAN__)"), |
|
("cc_on_ppc64", ".*(powerpc|ppc).*|.*powerpc.*", |
|
"defined(__powerpc64__) && " |
|
"defined(__BIG_ENDIAN__)"), |
|
("cc_on_aarch64", ".*(aarch64|arm64).*", ""), |
|
("cc_on_armhf", ".*arm.*", "defined(__ARM_ARCH_7__) || " |
|
"defined(__ARM_ARCH_7A__)"), |
|
("cc_on_s390x", ".*s390x.*", ""), |
|
|
|
("cc_on_noarch", "", ""), |
|
) |
|
detect_compiler = ( |
|
("cc_is_gcc", r".*(gcc|gnu\-g).*", ""), |
|
("cc_is_clang", ".*clang.*", ""), |
|
|
|
("cc_is_iccw", ".*(intelw|intelemw|iccw).*", ""), |
|
("cc_is_icc", ".*(intel|icc).*", ""), |
|
("cc_is_msvc", ".*msvc.*", ""), |
|
("cc_is_fcc", ".*fcc.*", ""), |
|
|
|
("cc_is_nocc", "", ""), |
|
) |
|
detect_args = ( |
|
("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*", ""), |
|
("cc_has_native", |
|
".*(-march=native|-xHost|/QxHost|-mcpu=a64fx).*", ""), |
|
|
|
("cc_noopt", ".*DISABLE_OPT.*", ""), |
|
) |
|
|
|
dist_info = self.dist_info() |
|
platform, compiler_info, extra_args = dist_info |
|
|
|
for section in (detect_arch, detect_compiler, detect_args): |
|
for attr, rgex, cexpr in section: |
|
setattr(self, attr, False) |
|
|
|
for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)): |
|
for attr, rgex, cexpr in detect: |
|
if rgex and not re.match(rgex, searchin, re.IGNORECASE): |
|
continue |
|
if cexpr and not self.cc_test_cexpr(cexpr): |
|
continue |
|
setattr(self, attr, True) |
|
break |
|
|
|
for attr, rgex, cexpr in detect_args: |
|
if rgex and not re.match(rgex, extra_args, re.IGNORECASE): |
|
continue |
|
if cexpr and not self.cc_test_cexpr(cexpr): |
|
continue |
|
setattr(self, attr, True) |
|
|
|
if self.cc_on_noarch: |
|
self.dist_log( |
|
"unable to detect CPU architecture which lead to disable the optimization. " |
|
f"check dist_info:<<\n{dist_info}\n>>", |
|
stderr=True |
|
) |
|
self.cc_noopt = True |
|
|
|
if self.conf_noopt: |
|
self.dist_log("Optimization is disabled by the Config", stderr=True) |
|
self.cc_noopt = True |
|
|
|
if self.cc_is_nocc: |
|
""" |
|
mingw can be treated as a gcc, and also xlc even if it based on clang, |
|
but still has the same gcc optimization flags. |
|
""" |
|
self.dist_log( |
|
"unable to detect compiler type which leads to treating it as GCC. " |
|
"this is a normal behavior if you're using gcc-like compiler such as MinGW or IBM/XLC." |
|
f"check dist_info:<<\n{dist_info}\n>>", |
|
stderr=True |
|
) |
|
self.cc_is_gcc = True |
|
|
|
self.cc_march = "unknown" |
|
for arch in ("x86", "x64", "ppc64", "ppc64le", |
|
"armhf", "aarch64", "s390x"): |
|
if getattr(self, "cc_on_" + arch): |
|
self.cc_march = arch |
|
break |
|
|
|
self.cc_name = "unknown" |
|
for name in ("gcc", "clang", "iccw", "icc", "msvc", "fcc"): |
|
if getattr(self, "cc_is_" + name): |
|
self.cc_name = name |
|
break |
|
|
|
self.cc_flags = {} |
|
compiler_flags = self.conf_cc_flags.get(self.cc_name) |
|
if compiler_flags is None: |
|
self.dist_fatal( |
|
"undefined flag for compiler '%s', " |
|
"leave an empty dict instead" % self.cc_name |
|
) |
|
for name, flags in compiler_flags.items(): |
|
self.cc_flags[name] = nflags = [] |
|
if flags: |
|
assert(isinstance(flags, str)) |
|
flags = flags.split() |
|
for f in flags: |
|
if self.cc_test_flags([f]): |
|
nflags.append(f) |
|
|
|
self.cc_is_cached = True |
|
|
|
@_Cache.me |
|
def cc_test_flags(self, flags): |
|
""" |
|
Returns True if the compiler supports 'flags'. |
|
""" |
|
assert(isinstance(flags, list)) |
|
self.dist_log("testing flags", flags) |
|
test_path = os.path.join(self.conf_check_path, "test_flags.c") |
|
test = self.dist_test(test_path, flags) |
|
if not test: |
|
self.dist_log("testing failed", stderr=True) |
|
return test |
|
|
|
@_Cache.me |
|
def cc_test_cexpr(self, cexpr, flags=[]): |
|
""" |
|
Same as the above but supports compile-time expressions. |
|
""" |
|
self.dist_log("testing compiler expression", cexpr) |
|
test_path = os.path.join(self.conf_tmp_path, "npy_dist_test_cexpr.c") |
|
with open(test_path, "w") as fd: |
|
fd.write(textwrap.dedent(f"""\ |
|
#if !({cexpr}) |
|
#error "unsupported expression" |
|
#endif |
|
int dummy; |
|
""")) |
|
test = self.dist_test(test_path, flags) |
|
if not test: |
|
self.dist_log("testing failed", stderr=True) |
|
return test |
|
|
|
def cc_normalize_flags(self, flags): |
|
""" |
|
Remove the conflicts that caused due gathering implied features flags. |
|
|
|
Parameters |
|
---------- |
|
'flags' list, compiler flags |
|
flags should be sorted from the lowest to the highest interest. |
|
|
|
Returns |
|
------- |
|
list, filtered from any conflicts. |
|
|
|
Examples |
|
-------- |
|
>>> self.cc_normalize_flags(['-march=armv8.2-a+fp16', '-march=armv8.2-a+dotprod']) |
|
['armv8.2-a+fp16+dotprod'] |
|
|
|
>>> self.cc_normalize_flags( |
|
['-msse', '-msse2', '-msse3', '-mssse3', '-msse4.1', '-msse4.2', '-mavx', '-march=core-avx2'] |
|
) |
|
['-march=core-avx2'] |
|
""" |
|
assert(isinstance(flags, list)) |
|
if self.cc_is_gcc or self.cc_is_clang or self.cc_is_icc: |
|
return self._cc_normalize_unix(flags) |
|
|
|
if self.cc_is_msvc or self.cc_is_iccw: |
|
return self._cc_normalize_win(flags) |
|
return flags |
|
|
|
_cc_normalize_unix_mrgx = re.compile( |
|
|
|
r"^(-mcpu=|-march=|-x[A-Z0-9\-])" |
|
) |
|
_cc_normalize_unix_frgx = re.compile( |
|
|
|
|
|
r"^(?!(-mcpu=|-march=|-x[A-Z0-9\-]|-m[a-z0-9\-\.]*.$))|" |
|
|
|
r"(?:-mzvector)" |
|
) |
|
_cc_normalize_unix_krgx = re.compile( |
|
|
|
r"^(-mfpu|-mtune)" |
|
) |
|
_cc_normalize_arch_ver = re.compile( |
|
r"[0-9.]" |
|
) |
|
def _cc_normalize_unix(self, flags): |
|
def ver_flags(f): |
|
|
|
|
|
tokens = f.split('+') |
|
ver = float('0' + ''.join( |
|
re.findall(self._cc_normalize_arch_ver, tokens[0]) |
|
)) |
|
return ver, tokens[0], tokens[1:] |
|
|
|
if len(flags) <= 1: |
|
return flags |
|
|
|
for i, cur_flag in enumerate(reversed(flags)): |
|
if not re.match(self._cc_normalize_unix_mrgx, cur_flag): |
|
continue |
|
lower_flags = flags[:-(i+1)] |
|
upper_flags = flags[-i:] |
|
filtered = list(filter( |
|
self._cc_normalize_unix_frgx.search, lower_flags |
|
)) |
|
|
|
ver, arch, subflags = ver_flags(cur_flag) |
|
if ver > 0 and len(subflags) > 0: |
|
for xflag in lower_flags: |
|
xver, _, xsubflags = ver_flags(xflag) |
|
if ver == xver: |
|
subflags = xsubflags + subflags |
|
cur_flag = arch + '+' + '+'.join(subflags) |
|
|
|
flags = filtered + [cur_flag] |
|
if i > 0: |
|
flags += upper_flags |
|
break |
|
|
|
|
|
final_flags = [] |
|
matched = set() |
|
for f in reversed(flags): |
|
match = re.match(self._cc_normalize_unix_krgx, f) |
|
if not match: |
|
pass |
|
elif match[0] in matched: |
|
continue |
|
else: |
|
matched.add(match[0]) |
|
final_flags.insert(0, f) |
|
return final_flags |
|
|
|
_cc_normalize_win_frgx = re.compile( |
|
r"^(?!(/arch\:|/Qx\:))" |
|
) |
|
_cc_normalize_win_mrgx = re.compile( |
|
r"^(/arch|/Qx:)" |
|
) |
|
def _cc_normalize_win(self, flags): |
|
for i, f in enumerate(reversed(flags)): |
|
if not re.match(self._cc_normalize_win_mrgx, f): |
|
continue |
|
i += 1 |
|
return list(filter( |
|
self._cc_normalize_win_frgx.search, flags[:-i] |
|
)) + flags[-i:] |
|
return flags |
|
|
|
class _Feature: |
|
"""A helper class for `CCompilerOpt` that managing CPU features. |
|
|
|
Attributes |
|
---------- |
|
feature_supported : dict |
|
Dictionary containing all CPU features that supported |
|
by the platform, according to the specified values in attribute |
|
`_Config.conf_features` and `_Config.conf_features_partial()` |
|
|
|
feature_min : set |
|
The minimum support of CPU features, according to |
|
the specified values in attribute `_Config.conf_min_features`. |
|
""" |
|
def __init__(self): |
|
if hasattr(self, "feature_is_cached"): |
|
return |
|
self.feature_supported = pfeatures = self.conf_features_partial() |
|
for feature_name in list(pfeatures.keys()): |
|
feature = pfeatures[feature_name] |
|
cfeature = self.conf_features[feature_name] |
|
feature.update({ |
|
k:v for k,v in cfeature.items() if k not in feature |
|
}) |
|
disabled = feature.get("disable") |
|
if disabled is not None: |
|
pfeatures.pop(feature_name) |
|
self.dist_log( |
|
"feature '%s' is disabled," % feature_name, |
|
disabled, stderr=True |
|
) |
|
continue |
|
|
|
for option in ( |
|
"implies", "group", "detect", "headers", "flags", "extra_checks" |
|
) : |
|
oval = feature.get(option) |
|
if isinstance(oval, str): |
|
feature[option] = oval.split() |
|
|
|
self.feature_min = set() |
|
min_f = self.conf_min_features.get(self.cc_march, "") |
|
for F in min_f.upper().split(): |
|
if F in self.feature_supported: |
|
self.feature_min.add(F) |
|
|
|
self.feature_is_cached = True |
|
|
|
def feature_names(self, names=None, force_flags=None, macros=[]): |
|
""" |
|
Returns a set of CPU feature names that supported by platform and the **C** compiler. |
|
|
|
Parameters |
|
---------- |
|
names : sequence or None, optional |
|
Specify certain CPU features to test it against the **C** compiler. |
|
if None(default), it will test all current supported features. |
|
**Note**: feature names must be in upper-case. |
|
|
|
force_flags : list or None, optional |
|
If None(default), default compiler flags for every CPU feature will |
|
be used during the test. |
|
|
|
macros : list of tuples, optional |
|
A list of C macro definitions. |
|
""" |
|
assert( |
|
names is None or ( |
|
not isinstance(names, str) and |
|
hasattr(names, "__iter__") |
|
) |
|
) |
|
assert(force_flags is None or isinstance(force_flags, list)) |
|
if names is None: |
|
names = self.feature_supported.keys() |
|
supported_names = set() |
|
for f in names: |
|
if self.feature_is_supported( |
|
f, force_flags=force_flags, macros=macros |
|
): |
|
supported_names.add(f) |
|
return supported_names |
|
|
|
def feature_is_exist(self, name): |
|
""" |
|
Returns True if a certain feature is exist and covered within |
|
``_Config.conf_features``. |
|
|
|
Parameters |
|
---------- |
|
'name': str |
|
feature name in uppercase. |
|
""" |
|
assert(name.isupper()) |
|
return name in self.conf_features |
|
|
|
def feature_sorted(self, names, reverse=False): |
|
""" |
|
Sort a list of CPU features ordered by the lowest interest. |
|
|
|
Parameters |
|
---------- |
|
'names': sequence |
|
sequence of supported feature names in uppercase. |
|
'reverse': bool, optional |
|
If true, the sorted features is reversed. (highest interest) |
|
|
|
Returns |
|
------- |
|
list, sorted CPU features |
|
""" |
|
def sort_cb(k): |
|
if isinstance(k, str): |
|
return self.feature_supported[k]["interest"] |
|
|
|
rank = max([self.feature_supported[f]["interest"] for f in k]) |
|
|
|
|
|
rank += len(k) -1 |
|
return rank |
|
return sorted(names, reverse=reverse, key=sort_cb) |
|
|
|
def feature_implies(self, names, keep_origins=False): |
|
""" |
|
Return a set of CPU features that implied by 'names' |
|
|
|
Parameters |
|
---------- |
|
names : str or sequence of str |
|
CPU feature name(s) in uppercase. |
|
|
|
keep_origins : bool |
|
if False(default) then the returned set will not contain any |
|
features from 'names'. This case happens only when two features |
|
imply each other. |
|
|
|
Examples |
|
-------- |
|
>>> self.feature_implies("SSE3") |
|
{'SSE', 'SSE2'} |
|
>>> self.feature_implies("SSE2") |
|
{'SSE'} |
|
>>> self.feature_implies("SSE2", keep_origins=True) |
|
# 'SSE2' found here since 'SSE' and 'SSE2' imply each other |
|
{'SSE', 'SSE2'} |
|
""" |
|
def get_implies(name, _caller=set()): |
|
implies = set() |
|
d = self.feature_supported[name] |
|
for i in d.get("implies", []): |
|
implies.add(i) |
|
if i in _caller: |
|
|
|
|
|
continue |
|
_caller.add(name) |
|
implies = implies.union(get_implies(i, _caller)) |
|
return implies |
|
|
|
if isinstance(names, str): |
|
implies = get_implies(names) |
|
names = [names] |
|
else: |
|
assert(hasattr(names, "__iter__")) |
|
implies = set() |
|
for n in names: |
|
implies = implies.union(get_implies(n)) |
|
if not keep_origins: |
|
implies.difference_update(names) |
|
return implies |
|
|
|
def feature_implies_c(self, names): |
|
"""same as feature_implies() but combining 'names'""" |
|
if isinstance(names, str): |
|
names = set((names,)) |
|
else: |
|
names = set(names) |
|
return names.union(self.feature_implies(names)) |
|
|
|
def feature_ahead(self, names): |
|
""" |
|
Return list of features in 'names' after remove any |
|
implied features and keep the origins. |
|
|
|
Parameters |
|
---------- |
|
'names': sequence |
|
sequence of CPU feature names in uppercase. |
|
|
|
Returns |
|
------- |
|
list of CPU features sorted as-is 'names' |
|
|
|
Examples |
|
-------- |
|
>>> self.feature_ahead(["SSE2", "SSE3", "SSE41"]) |
|
["SSE41"] |
|
# assume AVX2 and FMA3 implies each other and AVX2 |
|
# is the highest interest |
|
>>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"]) |
|
["AVX2"] |
|
# assume AVX2 and FMA3 don't implies each other |
|
>>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"]) |
|
["AVX2", "FMA3"] |
|
""" |
|
assert( |
|
not isinstance(names, str) |
|
and hasattr(names, '__iter__') |
|
) |
|
implies = self.feature_implies(names, keep_origins=True) |
|
ahead = [n for n in names if n not in implies] |
|
if len(ahead) == 0: |
|
|
|
|
|
ahead = self.feature_sorted(names, reverse=True)[:1] |
|
return ahead |
|
|
|
def feature_untied(self, names): |
|
""" |
|
same as 'feature_ahead()' but if both features implied each other |
|
and keep the highest interest. |
|
|
|
Parameters |
|
---------- |
|
'names': sequence |
|
sequence of CPU feature names in uppercase. |
|
|
|
Returns |
|
------- |
|
list of CPU features sorted as-is 'names' |
|
|
|
Examples |
|
-------- |
|
>>> self.feature_untied(["SSE2", "SSE3", "SSE41"]) |
|
["SSE2", "SSE3", "SSE41"] |
|
# assume AVX2 and FMA3 implies each other |
|
>>> self.feature_untied(["SSE2", "SSE3", "SSE41", "FMA3", "AVX2"]) |
|
["SSE2", "SSE3", "SSE41", "AVX2"] |
|
""" |
|
assert( |
|
not isinstance(names, str) |
|
and hasattr(names, '__iter__') |
|
) |
|
final = [] |
|
for n in names: |
|
implies = self.feature_implies(n) |
|
tied = [ |
|
nn for nn in final |
|
if nn in implies and n in self.feature_implies(nn) |
|
] |
|
if tied: |
|
tied = self.feature_sorted(tied + [n]) |
|
if n not in tied[1:]: |
|
continue |
|
final.remove(tied[:1][0]) |
|
final.append(n) |
|
return final |
|
|
|
def feature_get_til(self, names, keyisfalse): |
|
""" |
|
same as `feature_implies_c()` but stop collecting implied |
|
features when feature's option that provided through |
|
parameter 'keyisfalse' is False, also sorting the returned |
|
features. |
|
""" |
|
def til(tnames): |
|
|
|
tnames = self.feature_implies_c(tnames) |
|
tnames = self.feature_sorted(tnames, reverse=True) |
|
for i, n in enumerate(tnames): |
|
if not self.feature_supported[n].get(keyisfalse, True): |
|
tnames = tnames[:i+1] |
|
break |
|
return tnames |
|
|
|
if isinstance(names, str) or len(names) <= 1: |
|
names = til(names) |
|
|
|
names.reverse() |
|
return names |
|
|
|
names = self.feature_ahead(names) |
|
names = {t for n in names for t in til(n)} |
|
return self.feature_sorted(names) |
|
|
|
def feature_detect(self, names): |
|
""" |
|
Return a list of CPU features that required to be detected |
|
sorted from the lowest to highest interest. |
|
""" |
|
names = self.feature_get_til(names, "implies_detect") |
|
detect = [] |
|
for n in names: |
|
d = self.feature_supported[n] |
|
detect += d.get("detect", d.get("group", [n])) |
|
return detect |
|
|
|
@_Cache.me |
|
def feature_flags(self, names): |
|
""" |
|
Return a list of CPU features flags sorted from the lowest |
|
to highest interest. |
|
""" |
|
names = self.feature_sorted(self.feature_implies_c(names)) |
|
flags = [] |
|
for n in names: |
|
d = self.feature_supported[n] |
|
f = d.get("flags", []) |
|
if not f or not self.cc_test_flags(f): |
|
continue |
|
flags += f |
|
return self.cc_normalize_flags(flags) |
|
|
|
@_Cache.me |
|
def feature_test(self, name, force_flags=None, macros=[]): |
|
""" |
|
Test a certain CPU feature against the compiler through its own |
|
check file. |
|
|
|
Parameters |
|
---------- |
|
name : str |
|
Supported CPU feature name. |
|
|
|
force_flags : list or None, optional |
|
If None(default), the returned flags from `feature_flags()` |
|
will be used. |
|
|
|
macros : list of tuples, optional |
|
A list of C macro definitions. |
|
""" |
|
if force_flags is None: |
|
force_flags = self.feature_flags(name) |
|
|
|
self.dist_log( |
|
"testing feature '%s' with flags (%s)" % ( |
|
name, ' '.join(force_flags) |
|
)) |
|
|
|
|
|
test_path = os.path.join( |
|
self.conf_check_path, "cpu_%s.c" % name.lower() |
|
) |
|
if not os.path.exists(test_path): |
|
self.dist_fatal("feature test file is not exist", test_path) |
|
|
|
test = self.dist_test( |
|
test_path, force_flags + self.cc_flags["werror"], macros=macros |
|
) |
|
if not test: |
|
self.dist_log("testing failed", stderr=True) |
|
return test |
|
|
|
@_Cache.me |
|
def feature_is_supported(self, name, force_flags=None, macros=[]): |
|
""" |
|
Check if a certain CPU feature is supported by the platform and compiler. |
|
|
|
Parameters |
|
---------- |
|
name : str |
|
CPU feature name in uppercase. |
|
|
|
force_flags : list or None, optional |
|
If None(default), default compiler flags for every CPU feature will |
|
be used during test. |
|
|
|
macros : list of tuples, optional |
|
A list of C macro definitions. |
|
""" |
|
assert(name.isupper()) |
|
assert(force_flags is None or isinstance(force_flags, list)) |
|
|
|
supported = name in self.feature_supported |
|
if supported: |
|
for impl in self.feature_implies(name): |
|
if not self.feature_test(impl, force_flags, macros=macros): |
|
return False |
|
if not self.feature_test(name, force_flags, macros=macros): |
|
return False |
|
return supported |
|
|
|
@_Cache.me |
|
def feature_can_autovec(self, name): |
|
""" |
|
check if the feature can be auto-vectorized by the compiler |
|
""" |
|
assert(isinstance(name, str)) |
|
d = self.feature_supported[name] |
|
can = d.get("autovec", None) |
|
if can is None: |
|
valid_flags = [ |
|
self.cc_test_flags([f]) for f in d.get("flags", []) |
|
] |
|
can = valid_flags and any(valid_flags) |
|
return can |
|
|
|
@_Cache.me |
|
def feature_extra_checks(self, name): |
|
""" |
|
Return a list of supported extra checks after testing them against |
|
the compiler. |
|
|
|
Parameters |
|
---------- |
|
names : str |
|
CPU feature name in uppercase. |
|
""" |
|
assert isinstance(name, str) |
|
d = self.feature_supported[name] |
|
extra_checks = d.get("extra_checks", []) |
|
if not extra_checks: |
|
return [] |
|
|
|
self.dist_log("Testing extra checks for feature '%s'" % name, extra_checks) |
|
flags = self.feature_flags(name) |
|
available = [] |
|
not_available = [] |
|
for chk in extra_checks: |
|
test_path = os.path.join( |
|
self.conf_check_path, "extra_%s.c" % chk.lower() |
|
) |
|
if not os.path.exists(test_path): |
|
self.dist_fatal("extra check file does not exist", test_path) |
|
|
|
is_supported = self.dist_test(test_path, flags + self.cc_flags["werror"]) |
|
if is_supported: |
|
available.append(chk) |
|
else: |
|
not_available.append(chk) |
|
|
|
if not_available: |
|
self.dist_log("testing failed for checks", not_available, stderr=True) |
|
return available |
|
|
|
|
|
def feature_c_preprocessor(self, feature_name, tabs=0): |
|
""" |
|
Generate C preprocessor definitions and include headers of a CPU feature. |
|
|
|
Parameters |
|
---------- |
|
'feature_name': str |
|
CPU feature name in uppercase. |
|
'tabs': int |
|
if > 0, align the generated strings to the right depend on number of tabs. |
|
|
|
Returns |
|
------- |
|
str, generated C preprocessor |
|
|
|
Examples |
|
-------- |
|
>>> self.feature_c_preprocessor("SSE3") |
|
/** SSE3 **/ |
|
#define NPY_HAVE_SSE3 1 |
|
#include <pmmintrin.h> |
|
""" |
|
assert(feature_name.isupper()) |
|
feature = self.feature_supported.get(feature_name) |
|
assert(feature is not None) |
|
|
|
prepr = [ |
|
"/** %s **/" % feature_name, |
|
"#define %sHAVE_%s 1" % (self.conf_c_prefix, feature_name) |
|
] |
|
prepr += [ |
|
"#include <%s>" % h for h in feature.get("headers", []) |
|
] |
|
|
|
extra_defs = feature.get("group", []) |
|
extra_defs += self.feature_extra_checks(feature_name) |
|
for edef in extra_defs: |
|
|
|
|
|
prepr += [ |
|
"#ifndef %sHAVE_%s" % (self.conf_c_prefix, edef), |
|
"\t#define %sHAVE_%s 1" % (self.conf_c_prefix, edef), |
|
"#endif", |
|
] |
|
|
|
if tabs > 0: |
|
prepr = [('\t'*tabs) + l for l in prepr] |
|
return '\n'.join(prepr) |
|
|
|
class _Parse: |
|
"""A helper class that parsing main arguments of `CCompilerOpt`, |
|
also parsing configuration statements in dispatch-able sources. |
|
|
|
Parameters |
|
---------- |
|
cpu_baseline : str or None |
|
minimal set of required CPU features or special options. |
|
|
|
cpu_dispatch : str or None |
|
dispatched set of additional CPU features or special options. |
|
|
|
Special options can be: |
|
- **MIN**: Enables the minimum CPU features that utilized via `_Config.conf_min_features` |
|
- **MAX**: Enables all supported CPU features by the Compiler and platform. |
|
- **NATIVE**: Enables all CPU features that supported by the current machine. |
|
- **NONE**: Enables nothing |
|
- **Operand +/-**: remove or add features, useful with options **MAX**, **MIN** and **NATIVE**. |
|
NOTE: operand + is only added for nominal reason. |
|
|
|
NOTES: |
|
- Case-insensitive among all CPU features and special options. |
|
- Comma or space can be used as a separator. |
|
- If the CPU feature is not supported by the user platform or compiler, |
|
it will be skipped rather than raising a fatal error. |
|
- Any specified CPU features to 'cpu_dispatch' will be skipped if its part of CPU baseline features |
|
- 'cpu_baseline' force enables implied features. |
|
|
|
Attributes |
|
---------- |
|
parse_baseline_names : list |
|
Final CPU baseline's feature names(sorted from low to high) |
|
parse_baseline_flags : list |
|
Compiler flags of baseline features |
|
parse_dispatch_names : list |
|
Final CPU dispatch-able feature names(sorted from low to high) |
|
parse_target_groups : dict |
|
Dictionary containing initialized target groups that configured |
|
through class attribute `conf_target_groups`. |
|
|
|
The key is represent the group name and value is a tuple |
|
contains three items : |
|
- bool, True if group has the 'baseline' option. |
|
- list, list of CPU features. |
|
- list, list of extra compiler flags. |
|
|
|
""" |
|
def __init__(self, cpu_baseline, cpu_dispatch): |
|
self._parse_policies = dict( |
|
|
|
KEEP_BASELINE = ( |
|
None, self._parse_policy_not_keepbase, |
|
[] |
|
), |
|
KEEP_SORT = ( |
|
self._parse_policy_keepsort, |
|
self._parse_policy_not_keepsort, |
|
[] |
|
), |
|
MAXOPT = ( |
|
self._parse_policy_maxopt, None, |
|
[] |
|
), |
|
WERROR = ( |
|
self._parse_policy_werror, None, |
|
[] |
|
), |
|
AUTOVEC = ( |
|
self._parse_policy_autovec, None, |
|
["MAXOPT"] |
|
) |
|
) |
|
if hasattr(self, "parse_is_cached"): |
|
return |
|
|
|
self.parse_baseline_names = [] |
|
self.parse_baseline_flags = [] |
|
self.parse_dispatch_names = [] |
|
self.parse_target_groups = {} |
|
|
|
if self.cc_noopt: |
|
|
|
cpu_baseline = cpu_dispatch = None |
|
|
|
self.dist_log("check requested baseline") |
|
if cpu_baseline is not None: |
|
cpu_baseline = self._parse_arg_features("cpu_baseline", cpu_baseline) |
|
baseline_names = self.feature_names(cpu_baseline) |
|
self.parse_baseline_flags = self.feature_flags(baseline_names) |
|
self.parse_baseline_names = self.feature_sorted( |
|
self.feature_implies_c(baseline_names) |
|
) |
|
|
|
self.dist_log("check requested dispatch-able features") |
|
if cpu_dispatch is not None: |
|
cpu_dispatch_ = self._parse_arg_features("cpu_dispatch", cpu_dispatch) |
|
cpu_dispatch = { |
|
f for f in cpu_dispatch_ |
|
if f not in self.parse_baseline_names |
|
} |
|
conflict_baseline = cpu_dispatch_.difference(cpu_dispatch) |
|
self.parse_dispatch_names = self.feature_sorted( |
|
self.feature_names(cpu_dispatch) |
|
) |
|
if len(conflict_baseline) > 0: |
|
self.dist_log( |
|
"skip features", conflict_baseline, "since its part of baseline" |
|
) |
|
|
|
self.dist_log("initialize targets groups") |
|
for group_name, tokens in self.conf_target_groups.items(): |
|
self.dist_log("parse target group", group_name) |
|
GROUP_NAME = group_name.upper() |
|
if not tokens or not tokens.strip(): |
|
|
|
|
|
|
|
self.parse_target_groups[GROUP_NAME] = ( |
|
False, [], [] |
|
) |
|
continue |
|
has_baseline, features, extra_flags = \ |
|
self._parse_target_tokens(tokens) |
|
self.parse_target_groups[GROUP_NAME] = ( |
|
has_baseline, features, extra_flags |
|
) |
|
|
|
self.parse_is_cached = True |
|
|
|
def parse_targets(self, source): |
|
""" |
|
Fetch and parse configuration statements that required for |
|
defining the targeted CPU features, statements should be declared |
|
in the top of source in between **C** comment and start |
|
with a special mark **@targets**. |
|
|
|
Configuration statements are sort of keywords representing |
|
CPU features names, group of statements and policies, combined |
|
together to determine the required optimization. |
|
|
|
Parameters |
|
---------- |
|
source : str |
|
the path of **C** source file. |
|
|
|
Returns |
|
------- |
|
- bool, True if group has the 'baseline' option |
|
- list, list of CPU features |
|
- list, list of extra compiler flags |
|
""" |
|
self.dist_log("looking for '@targets' inside -> ", source) |
|
|
|
with open(source) as fd: |
|
tokens = "" |
|
max_to_reach = 1000 |
|
start_with = "@targets" |
|
start_pos = -1 |
|
end_with = "*/" |
|
end_pos = -1 |
|
for current_line, line in enumerate(fd): |
|
if current_line == max_to_reach: |
|
self.dist_fatal("reached the max of lines") |
|
break |
|
if start_pos == -1: |
|
start_pos = line.find(start_with) |
|
if start_pos == -1: |
|
continue |
|
start_pos += len(start_with) |
|
tokens += line |
|
end_pos = line.find(end_with) |
|
if end_pos != -1: |
|
end_pos += len(tokens) - len(line) |
|
break |
|
|
|
if start_pos == -1: |
|
self.dist_fatal("expected to find '%s' within a C comment" % start_with) |
|
if end_pos == -1: |
|
self.dist_fatal("expected to end with '%s'" % end_with) |
|
|
|
tokens = tokens[start_pos:end_pos] |
|
return self._parse_target_tokens(tokens) |
|
|
|
_parse_regex_arg = re.compile(r'\s|,|([+-])') |
|
def _parse_arg_features(self, arg_name, req_features): |
|
if not isinstance(req_features, str): |
|
self.dist_fatal("expected a string in '%s'" % arg_name) |
|
|
|
final_features = set() |
|
|
|
tokens = list(filter(None, re.split(self._parse_regex_arg, req_features))) |
|
append = True |
|
for tok in tokens: |
|
if tok[0] in ("#", "$"): |
|
self.dist_fatal( |
|
arg_name, "target groups and policies " |
|
"aren't allowed from arguments, " |
|
"only from dispatch-able sources" |
|
) |
|
if tok == '+': |
|
append = True |
|
continue |
|
if tok == '-': |
|
append = False |
|
continue |
|
|
|
TOK = tok.upper() |
|
features_to = set() |
|
if TOK == "NONE": |
|
pass |
|
elif TOK == "NATIVE": |
|
native = self.cc_flags["native"] |
|
if not native: |
|
self.dist_fatal(arg_name, |
|
"native option isn't supported by the compiler" |
|
) |
|
features_to = self.feature_names( |
|
force_flags=native, macros=[("DETECT_FEATURES", 1)] |
|
) |
|
elif TOK == "MAX": |
|
features_to = self.feature_supported.keys() |
|
elif TOK == "MIN": |
|
features_to = self.feature_min |
|
else: |
|
if TOK in self.feature_supported: |
|
features_to.add(TOK) |
|
else: |
|
if not self.feature_is_exist(TOK): |
|
self.dist_fatal(arg_name, |
|
", '%s' isn't a known feature or option" % tok |
|
) |
|
if append: |
|
final_features = final_features.union(features_to) |
|
else: |
|
final_features = final_features.difference(features_to) |
|
|
|
append = True |
|
|
|
return final_features |
|
|
|
_parse_regex_target = re.compile(r'\s|[*,/]|([()])') |
|
def _parse_target_tokens(self, tokens): |
|
assert(isinstance(tokens, str)) |
|
final_targets = [] |
|
extra_flags = [] |
|
has_baseline = False |
|
|
|
skipped = set() |
|
policies = set() |
|
multi_target = None |
|
|
|
tokens = list(filter(None, re.split(self._parse_regex_target, tokens))) |
|
if not tokens: |
|
self.dist_fatal("expected one token at least") |
|
|
|
for tok in tokens: |
|
TOK = tok.upper() |
|
ch = tok[0] |
|
if ch in ('+', '-'): |
|
self.dist_fatal( |
|
"+/- are 'not' allowed from target's groups or @targets, " |
|
"only from cpu_baseline and cpu_dispatch parms" |
|
) |
|
elif ch == '$': |
|
if multi_target is not None: |
|
self.dist_fatal( |
|
"policies aren't allowed inside multi-target '()'" |
|
", only CPU features" |
|
) |
|
policies.add(self._parse_token_policy(TOK)) |
|
elif ch == '#': |
|
if multi_target is not None: |
|
self.dist_fatal( |
|
"target groups aren't allowed inside multi-target '()'" |
|
", only CPU features" |
|
) |
|
has_baseline, final_targets, extra_flags = \ |
|
self._parse_token_group(TOK, has_baseline, final_targets, extra_flags) |
|
elif ch == '(': |
|
if multi_target is not None: |
|
self.dist_fatal("unclosed multi-target, missing ')'") |
|
multi_target = set() |
|
elif ch == ')': |
|
if multi_target is None: |
|
self.dist_fatal("multi-target opener '(' wasn't found") |
|
targets = self._parse_multi_target(multi_target) |
|
if targets is None: |
|
skipped.add(tuple(multi_target)) |
|
else: |
|
if len(targets) == 1: |
|
targets = targets[0] |
|
if targets and targets not in final_targets: |
|
final_targets.append(targets) |
|
multi_target = None |
|
else: |
|
if TOK == "BASELINE": |
|
if multi_target is not None: |
|
self.dist_fatal("baseline isn't allowed inside multi-target '()'") |
|
has_baseline = True |
|
continue |
|
|
|
if multi_target is not None: |
|
multi_target.add(TOK) |
|
continue |
|
|
|
if not self.feature_is_exist(TOK): |
|
self.dist_fatal("invalid target name '%s'" % TOK) |
|
|
|
is_enabled = ( |
|
TOK in self.parse_baseline_names or |
|
TOK in self.parse_dispatch_names |
|
) |
|
if is_enabled: |
|
if TOK not in final_targets: |
|
final_targets.append(TOK) |
|
continue |
|
|
|
skipped.add(TOK) |
|
|
|
if multi_target is not None: |
|
self.dist_fatal("unclosed multi-target, missing ')'") |
|
if skipped: |
|
self.dist_log( |
|
"skip targets", skipped, |
|
"not part of baseline or dispatch-able features" |
|
) |
|
|
|
final_targets = self.feature_untied(final_targets) |
|
|
|
|
|
for p in list(policies): |
|
_, _, deps = self._parse_policies[p] |
|
for d in deps: |
|
if d in policies: |
|
continue |
|
self.dist_log( |
|
"policy '%s' force enables '%s'" % ( |
|
p, d |
|
)) |
|
policies.add(d) |
|
|
|
|
|
for p, (have, nhave, _) in self._parse_policies.items(): |
|
func = None |
|
if p in policies: |
|
func = have |
|
self.dist_log("policy '%s' is ON" % p) |
|
else: |
|
func = nhave |
|
if not func: |
|
continue |
|
has_baseline, final_targets, extra_flags = func( |
|
has_baseline, final_targets, extra_flags |
|
) |
|
|
|
return has_baseline, final_targets, extra_flags |
|
|
|
def _parse_token_policy(self, token): |
|
"""validate policy token""" |
|
if len(token) <= 1 or token[-1:] == token[0]: |
|
self.dist_fatal("'$' must stuck in the begin of policy name") |
|
token = token[1:] |
|
if token not in self._parse_policies: |
|
self.dist_fatal( |
|
"'%s' is an invalid policy name, available policies are" % token, |
|
self._parse_policies.keys() |
|
) |
|
return token |
|
|
|
def _parse_token_group(self, token, has_baseline, final_targets, extra_flags): |
|
"""validate group token""" |
|
if len(token) <= 1 or token[-1:] == token[0]: |
|
self.dist_fatal("'#' must stuck in the begin of group name") |
|
|
|
token = token[1:] |
|
ghas_baseline, gtargets, gextra_flags = self.parse_target_groups.get( |
|
token, (False, None, []) |
|
) |
|
if gtargets is None: |
|
self.dist_fatal( |
|
"'%s' is an invalid target group name, " % token + \ |
|
"available target groups are", |
|
self.parse_target_groups.keys() |
|
) |
|
if ghas_baseline: |
|
has_baseline = True |
|
|
|
final_targets += [f for f in gtargets if f not in final_targets] |
|
extra_flags += [f for f in gextra_flags if f not in extra_flags] |
|
return has_baseline, final_targets, extra_flags |
|
|
|
def _parse_multi_target(self, targets): |
|
"""validate multi targets that defined between parentheses()""" |
|
|
|
if not targets: |
|
self.dist_fatal("empty multi-target '()'") |
|
if not all([ |
|
self.feature_is_exist(tar) for tar in targets |
|
]) : |
|
self.dist_fatal("invalid target name in multi-target", targets) |
|
if not all([ |
|
( |
|
tar in self.parse_baseline_names or |
|
tar in self.parse_dispatch_names |
|
) |
|
for tar in targets |
|
]) : |
|
return None |
|
targets = self.feature_ahead(targets) |
|
if not targets: |
|
return None |
|
|
|
targets = self.feature_sorted(targets) |
|
targets = tuple(targets) |
|
return targets |
|
|
|
def _parse_policy_not_keepbase(self, has_baseline, final_targets, extra_flags): |
|
"""skip all baseline features""" |
|
skipped = [] |
|
for tar in final_targets[:]: |
|
is_base = False |
|
if isinstance(tar, str): |
|
is_base = tar in self.parse_baseline_names |
|
else: |
|
|
|
is_base = all([ |
|
f in self.parse_baseline_names |
|
for f in tar |
|
]) |
|
if is_base: |
|
skipped.append(tar) |
|
final_targets.remove(tar) |
|
|
|
if skipped: |
|
self.dist_log("skip baseline features", skipped) |
|
|
|
return has_baseline, final_targets, extra_flags |
|
|
|
def _parse_policy_keepsort(self, has_baseline, final_targets, extra_flags): |
|
"""leave a notice that $keep_sort is on""" |
|
self.dist_log( |
|
"policy 'keep_sort' is on, dispatch-able targets", final_targets, "\n" |
|
"are 'not' sorted depend on the highest interest but" |
|
"as specified in the dispatch-able source or the extra group" |
|
) |
|
return has_baseline, final_targets, extra_flags |
|
|
|
def _parse_policy_not_keepsort(self, has_baseline, final_targets, extra_flags): |
|
"""sorted depend on the highest interest""" |
|
final_targets = self.feature_sorted(final_targets, reverse=True) |
|
return has_baseline, final_targets, extra_flags |
|
|
|
def _parse_policy_maxopt(self, has_baseline, final_targets, extra_flags): |
|
"""append the compiler optimization flags""" |
|
if self.cc_has_debug: |
|
self.dist_log("debug mode is detected, policy 'maxopt' is skipped.") |
|
elif self.cc_noopt: |
|
self.dist_log("optimization is disabled, policy 'maxopt' is skipped.") |
|
else: |
|
flags = self.cc_flags["opt"] |
|
if not flags: |
|
self.dist_log( |
|
"current compiler doesn't support optimization flags, " |
|
"policy 'maxopt' is skipped", stderr=True |
|
) |
|
else: |
|
extra_flags += flags |
|
return has_baseline, final_targets, extra_flags |
|
|
|
def _parse_policy_werror(self, has_baseline, final_targets, extra_flags): |
|
"""force warnings to treated as errors""" |
|
flags = self.cc_flags["werror"] |
|
if not flags: |
|
self.dist_log( |
|
"current compiler doesn't support werror flags, " |
|
"warnings will 'not' treated as errors", stderr=True |
|
) |
|
else: |
|
self.dist_log("compiler warnings are treated as errors") |
|
extra_flags += flags |
|
return has_baseline, final_targets, extra_flags |
|
|
|
def _parse_policy_autovec(self, has_baseline, final_targets, extra_flags): |
|
"""skip features that has no auto-vectorized support by compiler""" |
|
skipped = [] |
|
for tar in final_targets[:]: |
|
if isinstance(tar, str): |
|
can = self.feature_can_autovec(tar) |
|
else: |
|
can = all([ |
|
self.feature_can_autovec(t) |
|
for t in tar |
|
]) |
|
if not can: |
|
final_targets.remove(tar) |
|
skipped.append(tar) |
|
|
|
if skipped: |
|
self.dist_log("skip non auto-vectorized features", skipped) |
|
|
|
return has_baseline, final_targets, extra_flags |
|
|
|
class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): |
|
""" |
|
A helper class for `CCompiler` aims to provide extra build options |
|
to effectively control of compiler optimizations that are directly |
|
related to CPU features. |
|
""" |
|
def __init__(self, ccompiler, cpu_baseline="min", cpu_dispatch="max", cache_path=None): |
|
_Config.__init__(self) |
|
_Distutils.__init__(self, ccompiler) |
|
_Cache.__init__(self, cache_path, self.dist_info(), cpu_baseline, cpu_dispatch) |
|
_CCompiler.__init__(self) |
|
_Feature.__init__(self) |
|
if not self.cc_noopt and self.cc_has_native: |
|
self.dist_log( |
|
"native flag is specified through environment variables. " |
|
"force cpu-baseline='native'" |
|
) |
|
cpu_baseline = "native" |
|
_Parse.__init__(self, cpu_baseline, cpu_dispatch) |
|
|
|
|
|
self._requested_baseline = cpu_baseline |
|
self._requested_dispatch = cpu_dispatch |
|
|
|
|
|
self.sources_status = getattr(self, "sources_status", {}) |
|
|
|
self.cache_private.add("sources_status") |
|
|
|
|
|
self.hit_cache = hasattr(self, "hit_cache") |
|
|
|
def is_cached(self): |
|
""" |
|
Returns True if the class loaded from the cache file |
|
""" |
|
return self.cache_infile and self.hit_cache |
|
|
|
def cpu_baseline_flags(self): |
|
""" |
|
Returns a list of final CPU baseline compiler flags |
|
""" |
|
return self.parse_baseline_flags |
|
|
|
def cpu_baseline_names(self): |
|
""" |
|
return a list of final CPU baseline feature names |
|
""" |
|
return self.parse_baseline_names |
|
|
|
def cpu_dispatch_names(self): |
|
""" |
|
return a list of final CPU dispatch feature names |
|
""" |
|
return self.parse_dispatch_names |
|
|
|
def try_dispatch(self, sources, src_dir=None, ccompiler=None, **kwargs): |
|
""" |
|
Compile one or more dispatch-able sources and generates object files, |
|
also generates abstract C config headers and macros that |
|
used later for the final runtime dispatching process. |
|
|
|
The mechanism behind it is to takes each source file that specified |
|
in 'sources' and branching it into several files depend on |
|
special configuration statements that must be declared in the |
|
top of each source which contains targeted CPU features, |
|
then it compiles every branched source with the proper compiler flags. |
|
|
|
Parameters |
|
---------- |
|
sources : list |
|
Must be a list of dispatch-able sources file paths, |
|
and configuration statements must be declared inside |
|
each file. |
|
|
|
src_dir : str |
|
Path of parent directory for the generated headers and wrapped sources. |
|
If None(default) the files will generated in-place. |
|
|
|
ccompiler : CCompiler |
|
Distutils `CCompiler` instance to be used for compilation. |
|
If None (default), the provided instance during the initialization |
|
will be used instead. |
|
|
|
**kwargs : any |
|
Arguments to pass on to the `CCompiler.compile()` |
|
|
|
Returns |
|
------- |
|
list : generated object files |
|
|
|
Raises |
|
------ |
|
CompileError |
|
Raises by `CCompiler.compile()` on compiling failure. |
|
DistutilsError |
|
Some errors during checking the sanity of configuration statements. |
|
|
|
See Also |
|
-------- |
|
parse_targets : |
|
Parsing the configuration statements of dispatch-able sources. |
|
""" |
|
to_compile = {} |
|
baseline_flags = self.cpu_baseline_flags() |
|
include_dirs = kwargs.setdefault("include_dirs", []) |
|
|
|
for src in sources: |
|
output_dir = os.path.dirname(src) |
|
if src_dir: |
|
if not output_dir.startswith(src_dir): |
|
output_dir = os.path.join(src_dir, output_dir) |
|
if output_dir not in include_dirs: |
|
|
|
|
|
include_dirs.append(output_dir) |
|
|
|
has_baseline, targets, extra_flags = self.parse_targets(src) |
|
nochange = self._generate_config(output_dir, src, targets, has_baseline) |
|
for tar in targets: |
|
tar_src = self._wrap_target(output_dir, src, tar, nochange=nochange) |
|
flags = tuple(extra_flags + self.feature_flags(tar)) |
|
to_compile.setdefault(flags, []).append(tar_src) |
|
|
|
if has_baseline: |
|
flags = tuple(extra_flags + baseline_flags) |
|
to_compile.setdefault(flags, []).append(src) |
|
|
|
self.sources_status[src] = (has_baseline, targets) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
objects = [] |
|
for flags, srcs in to_compile.items(): |
|
objects += self.dist_compile( |
|
srcs, list(flags), ccompiler=ccompiler, **kwargs |
|
) |
|
return objects |
|
|
|
def generate_dispatch_header(self, header_path): |
|
""" |
|
Generate the dispatch header which contains the #definitions and headers |
|
for platform-specific instruction-sets for the enabled CPU baseline and |
|
dispatch-able features. |
|
|
|
Its highly recommended to take a look at the generated header |
|
also the generated source files via `try_dispatch()` |
|
in order to get the full picture. |
|
""" |
|
self.dist_log("generate CPU dispatch header: (%s)" % header_path) |
|
|
|
baseline_names = self.cpu_baseline_names() |
|
dispatch_names = self.cpu_dispatch_names() |
|
baseline_len = len(baseline_names) |
|
dispatch_len = len(dispatch_names) |
|
|
|
header_dir = os.path.dirname(header_path) |
|
if not os.path.exists(header_dir): |
|
self.dist_log( |
|
f"dispatch header dir {header_dir} does not exist, creating it", |
|
stderr=True |
|
) |
|
os.makedirs(header_dir) |
|
|
|
with open(header_path, 'w') as f: |
|
baseline_calls = ' \\\n'.join([ |
|
( |
|
"\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))" |
|
) % (self.conf_c_prefix, f) |
|
for f in baseline_names |
|
]) |
|
dispatch_calls = ' \\\n'.join([ |
|
( |
|
"\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))" |
|
) % (self.conf_c_prefix, f) |
|
for f in dispatch_names |
|
]) |
|
f.write(textwrap.dedent("""\ |
|
/* |
|
* AUTOGENERATED DON'T EDIT |
|
* Please make changes to the code generator (distutils/ccompiler_opt.py) |
|
*/ |
|
#define {pfx}WITH_CPU_BASELINE "{baseline_str}" |
|
#define {pfx}WITH_CPU_DISPATCH "{dispatch_str}" |
|
#define {pfx}WITH_CPU_BASELINE_N {baseline_len} |
|
#define {pfx}WITH_CPU_DISPATCH_N {dispatch_len} |
|
#define {pfx}WITH_CPU_EXPAND_(X) X |
|
#define {pfx}WITH_CPU_BASELINE_CALL(MACRO_TO_CALL, ...) \\ |
|
{baseline_calls} |
|
#define {pfx}WITH_CPU_DISPATCH_CALL(MACRO_TO_CALL, ...) \\ |
|
{dispatch_calls} |
|
""").format( |
|
pfx=self.conf_c_prefix, baseline_str=" ".join(baseline_names), |
|
dispatch_str=" ".join(dispatch_names), baseline_len=baseline_len, |
|
dispatch_len=dispatch_len, baseline_calls=baseline_calls, |
|
dispatch_calls=dispatch_calls |
|
)) |
|
baseline_pre = '' |
|
for name in baseline_names: |
|
baseline_pre += self.feature_c_preprocessor(name, tabs=1) + '\n' |
|
|
|
dispatch_pre = '' |
|
for name in dispatch_names: |
|
dispatch_pre += textwrap.dedent("""\ |
|
#ifdef {pfx}CPU_TARGET_{name} |
|
{pre} |
|
#endif /*{pfx}CPU_TARGET_{name}*/ |
|
""").format( |
|
pfx=self.conf_c_prefix_, name=name, pre=self.feature_c_preprocessor( |
|
name, tabs=1 |
|
)) |
|
|
|
f.write(textwrap.dedent("""\ |
|
/******* baseline features *******/ |
|
{baseline_pre} |
|
/******* dispatch features *******/ |
|
{dispatch_pre} |
|
""").format( |
|
pfx=self.conf_c_prefix_, baseline_pre=baseline_pre, |
|
dispatch_pre=dispatch_pre |
|
)) |
|
|
|
def report(self, full=False): |
|
report = [] |
|
platform_rows = [] |
|
baseline_rows = [] |
|
dispatch_rows = [] |
|
report.append(("Platform", platform_rows)) |
|
report.append(("", "")) |
|
report.append(("CPU baseline", baseline_rows)) |
|
report.append(("", "")) |
|
report.append(("CPU dispatch", dispatch_rows)) |
|
|
|
|
|
platform_rows.append(("Architecture", ( |
|
"unsupported" if self.cc_on_noarch else self.cc_march) |
|
)) |
|
platform_rows.append(("Compiler", ( |
|
"unix-like" if self.cc_is_nocc else self.cc_name) |
|
)) |
|
|
|
if self.cc_noopt: |
|
baseline_rows.append(("Requested", "optimization disabled")) |
|
else: |
|
baseline_rows.append(("Requested", repr(self._requested_baseline))) |
|
|
|
baseline_names = self.cpu_baseline_names() |
|
baseline_rows.append(( |
|
"Enabled", (' '.join(baseline_names) if baseline_names else "none") |
|
)) |
|
baseline_flags = self.cpu_baseline_flags() |
|
baseline_rows.append(( |
|
"Flags", (' '.join(baseline_flags) if baseline_flags else "none") |
|
)) |
|
extra_checks = [] |
|
for name in baseline_names: |
|
extra_checks += self.feature_extra_checks(name) |
|
baseline_rows.append(( |
|
"Extra checks", (' '.join(extra_checks) if extra_checks else "none") |
|
)) |
|
|
|
|
|
if self.cc_noopt: |
|
baseline_rows.append(("Requested", "optimization disabled")) |
|
else: |
|
dispatch_rows.append(("Requested", repr(self._requested_dispatch))) |
|
|
|
dispatch_names = self.cpu_dispatch_names() |
|
dispatch_rows.append(( |
|
"Enabled", (' '.join(dispatch_names) if dispatch_names else "none") |
|
)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
target_sources = {} |
|
for source, (_, targets) in self.sources_status.items(): |
|
for tar in targets: |
|
target_sources.setdefault(tar, []).append(source) |
|
|
|
if not full or not target_sources: |
|
generated = "" |
|
for tar in self.feature_sorted(target_sources): |
|
sources = target_sources[tar] |
|
name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar) |
|
generated += name + "[%d] " % len(sources) |
|
dispatch_rows.append(("Generated", generated[:-1] if generated else "none")) |
|
else: |
|
dispatch_rows.append(("Generated", '')) |
|
for tar in self.feature_sorted(target_sources): |
|
sources = target_sources[tar] |
|
pretty_name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar) |
|
flags = ' '.join(self.feature_flags(tar)) |
|
implies = ' '.join(self.feature_sorted(self.feature_implies(tar))) |
|
detect = ' '.join(self.feature_detect(tar)) |
|
extra_checks = [] |
|
for name in ((tar,) if isinstance(tar, str) else tar): |
|
extra_checks += self.feature_extra_checks(name) |
|
extra_checks = (' '.join(extra_checks) if extra_checks else "none") |
|
|
|
dispatch_rows.append(('', '')) |
|
dispatch_rows.append((pretty_name, implies)) |
|
dispatch_rows.append(("Flags", flags)) |
|
dispatch_rows.append(("Extra checks", extra_checks)) |
|
dispatch_rows.append(("Detect", detect)) |
|
for src in sources: |
|
dispatch_rows.append(("", src)) |
|
|
|
|
|
|
|
text = [] |
|
secs_len = [len(secs) for secs, _ in report] |
|
cols_len = [len(col) for _, rows in report for col, _ in rows] |
|
tab = ' ' * 2 |
|
pad = max(max(secs_len), max(cols_len)) |
|
for sec, rows in report: |
|
if not sec: |
|
text.append("") |
|
continue |
|
sec += ' ' * (pad - len(sec)) |
|
text.append(sec + tab + ': ') |
|
for col, val in rows: |
|
col += ' ' * (pad - len(col)) |
|
text.append(tab + col + ': ' + val) |
|
|
|
return '\n'.join(text) |
|
|
|
def _wrap_target(self, output_dir, dispatch_src, target, nochange=False): |
|
assert(isinstance(target, (str, tuple))) |
|
if isinstance(target, str): |
|
ext_name = target_name = target |
|
else: |
|
|
|
ext_name = '.'.join(target) |
|
target_name = '__'.join(target) |
|
|
|
wrap_path = os.path.join(output_dir, os.path.basename(dispatch_src)) |
|
wrap_path = "{0}.{2}{1}".format(*os.path.splitext(wrap_path), ext_name.lower()) |
|
if nochange and os.path.exists(wrap_path): |
|
return wrap_path |
|
|
|
self.dist_log("wrap dispatch-able target -> ", wrap_path) |
|
|
|
features = self.feature_sorted(self.feature_implies_c(target)) |
|
target_join = "#define %sCPU_TARGET_" % self.conf_c_prefix_ |
|
target_defs = [target_join + f for f in features] |
|
target_defs = '\n'.join(target_defs) |
|
|
|
with open(wrap_path, "w") as fd: |
|
fd.write(textwrap.dedent("""\ |
|
/** |
|
* AUTOGENERATED DON'T EDIT |
|
* Please make changes to the code generator \ |
|
(distutils/ccompiler_opt.py) |
|
*/ |
|
#define {pfx}CPU_TARGET_MODE |
|
#define {pfx}CPU_TARGET_CURRENT {target_name} |
|
{target_defs} |
|
#include "{path}" |
|
""").format( |
|
pfx=self.conf_c_prefix_, target_name=target_name, |
|
path=os.path.abspath(dispatch_src), target_defs=target_defs |
|
)) |
|
return wrap_path |
|
|
|
def _generate_config(self, output_dir, dispatch_src, targets, has_baseline=False): |
|
config_path = os.path.basename(dispatch_src) |
|
config_path = os.path.splitext(config_path)[0] + '.h' |
|
config_path = os.path.join(output_dir, config_path) |
|
|
|
cache_hash = self.cache_hash(targets, has_baseline) |
|
try: |
|
with open(config_path) as f: |
|
last_hash = f.readline().split("cache_hash:") |
|
if len(last_hash) == 2 and int(last_hash[1]) == cache_hash: |
|
return True |
|
except OSError: |
|
pass |
|
|
|
os.makedirs(os.path.dirname(config_path), exist_ok=True) |
|
|
|
self.dist_log("generate dispatched config -> ", config_path) |
|
dispatch_calls = [] |
|
for tar in targets: |
|
if isinstance(tar, str): |
|
target_name = tar |
|
else: |
|
target_name = '__'.join([t for t in tar]) |
|
req_detect = self.feature_detect(tar) |
|
req_detect = '&&'.join([ |
|
"CHK(%s)" % f for f in req_detect |
|
]) |
|
dispatch_calls.append( |
|
"\t%sCPU_DISPATCH_EXPAND_(CB((%s), %s, __VA_ARGS__))" % ( |
|
self.conf_c_prefix_, req_detect, target_name |
|
)) |
|
dispatch_calls = ' \\\n'.join(dispatch_calls) |
|
|
|
if has_baseline: |
|
baseline_calls = ( |
|
"\t%sCPU_DISPATCH_EXPAND_(CB(__VA_ARGS__))" |
|
) % self.conf_c_prefix_ |
|
else: |
|
baseline_calls = '' |
|
|
|
with open(config_path, "w") as fd: |
|
fd.write(textwrap.dedent("""\ |
|
// cache_hash:{cache_hash} |
|
/** |
|
* AUTOGENERATED DON'T EDIT |
|
* Please make changes to the code generator (distutils/ccompiler_opt.py) |
|
*/ |
|
#ifndef {pfx}CPU_DISPATCH_EXPAND_ |
|
#define {pfx}CPU_DISPATCH_EXPAND_(X) X |
|
#endif |
|
#undef {pfx}CPU_DISPATCH_BASELINE_CALL |
|
#undef {pfx}CPU_DISPATCH_CALL |
|
#define {pfx}CPU_DISPATCH_BASELINE_CALL(CB, ...) \\ |
|
{baseline_calls} |
|
#define {pfx}CPU_DISPATCH_CALL(CHK, CB, ...) \\ |
|
{dispatch_calls} |
|
""").format( |
|
pfx=self.conf_c_prefix_, baseline_calls=baseline_calls, |
|
dispatch_calls=dispatch_calls, cache_hash=cache_hash |
|
)) |
|
return False |
|
|
|
def new_ccompiler_opt(compiler, dispatch_hpath, **kwargs): |
|
""" |
|
Create a new instance of 'CCompilerOpt' and generate the dispatch header |
|
which contains the #definitions and headers of platform-specific instruction-sets for |
|
the enabled CPU baseline and dispatch-able features. |
|
|
|
Parameters |
|
---------- |
|
compiler : CCompiler instance |
|
dispatch_hpath : str |
|
path of the dispatch header |
|
|
|
**kwargs: passed as-is to `CCompilerOpt(...)` |
|
Returns |
|
------- |
|
new instance of CCompilerOpt |
|
""" |
|
opt = CCompilerOpt(compiler, **kwargs) |
|
if not os.path.exists(dispatch_hpath) or not opt.is_cached(): |
|
opt.generate_dispatch_header(dispatch_hpath) |
|
return opt |
|
|