Fixup platform FP8 data type query

Browse files

Files changed (2) hide show

torch-ext/quantization/compressed_tensors.py +3 -1
torch-ext/quantization/platforms.py +35 -0

torch-ext/quantization/compressed_tensors.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from typing import Optional, Tuple
 import torch
 from ._ops import ops
 # fp8
 def scaled_fp8_quant(

+from typing import Optional, Union
 import torch
 from ._ops import ops
+from .platforms import current_platform
 # fp8
 def scaled_fp8_quant(

torch-ext/quantization/platforms.py CHANGED Viewed

@@ -27,6 +27,29 @@ class DeviceCapability(NamedTuple):
 class Platform(ABC):
     simple_compile_backend: str = "inductor"
     @classmethod
     @abstractmethod
     def get_device_name(cls, device_id: int = 0) -> str: ...
@@ -51,6 +74,18 @@ class CudaPlatform(Platform):
 class RocmPlatform(Platform):
     @classmethod
     @lru_cache(maxsize=8)
     def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:

 class Platform(ABC):
     simple_compile_backend: str = "inductor"
+    @classmethod
+    def fp8_dtype(cls) -> torch.dtype:
+        """
+        Returns the preferred FP8 type on the current platform.
+        See the documentation for is_fp8_fnuz for details.
+        """
+        return torch.float8_e4m3fn
+    @classmethod
+    def is_fp8_fnuz(cls) -> bool:
+        """
+        Returns whether the preferred FP8 type is FNUZ on the current platform.
+        There are two representations of FP8, OCP FP8 and FNUZ FP8.
+        The OCP specification can be found at https://tinyurl.com/b7jvwpft.
+        The FNUZ specification can be found at https://tinyurl.com/5n6hwwu5.
+        AMD's MI300 and MI325 have native hardware support for FNUZ. All other
+        hardware has converged on the OCP FP8 standard.
+        """
+        return False
     @classmethod
     @abstractmethod
     def get_device_name(cls, device_id: int = 0) -> str: ...
 class RocmPlatform(Platform):
+    @classmethod
+    def fp8_dtype(cls) -> torch.dtype:
+        if cls.is_fp8_fnuz():
+            return torch.float8_e4m3fnuz
+        else:
+            return torch.float8_e4m3fn
+    @classmethod
+    def is_fp8_fnuz(cls) -> bool:
+        # only device 0 is checked, this assumes MI300 platforms are homogeneous
+        return "gfx94" in torch.cuda.get_device_properties(0).gcnArchName
     @classmethod
     @lru_cache(maxsize=8)
     def get_device_capability(cls, device_id: int = 0) -> DeviceCapability: