30#ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP8_H_
31#define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP8_H_
33#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx1200__) || \
34 defined(__gfx1201__)) && \
35 __HIP_DEVICE_COMPILE__
36#define HIP_FP8_CVT_FAST_PATH 1
38#define HIP_FP8_CVT_FAST_PATH 0
41#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && __HIP_DEVICE_COMPILE__
42#define HIP_FP8_TYPE_OCP 0
43#define HIP_FP8_TYPE_FNUZ 1
44#elif (defined(__gfx1200__) || defined(__gfx1201__)) && __HIP_DEVICE_COMPILE__
45#define HIP_FP8_TYPE_OCP 1
46#define HIP_FP8_TYPE_FNUZ 0
47#elif __HIP_DEVICE_COMPILE__
48#define HIP_FP8_TYPE_FNUZ 0
49#define HIP_FP8_TYPE_OCP 0
51#define HIP_FP8_TYPE_FNUZ 1
52#define HIP_FP8_TYPE_OCP 1
55#if !defined(__HIPCC_RTC__)
56#include <hip/amd_detail/amd_hip_common.h>
60#include "amd_hip_vector_types.h"
61#include "amd_hip_fp16.h"
66#if defined(__HIPCC_RTC__)
67#define __FP8_HOST_DEVICE__ __device__
68#define __FP8_HOST_DEVICE_STATIC__ __FP8_HOST_DEVICE__ static
70#define __FP8_HOST_DEVICE__ __host__ __device__
71#define __FP8_HOST_DEVICE_STATIC__ __FP8_HOST_DEVICE__ static inline
74#define __FP8_HOST__ __host__
76#if !defined(__HIPCC_RTC__)
77static_assert(CHAR_BIT == 8,
"byte size should be of 8 bits");
79static_assert(
sizeof(
unsigned char) == 1);
80static_assert(
sizeof(
unsigned short int) == 2);
81static_assert(
sizeof(
unsigned int) == 4);
126template <
typename T,
bool is_fnuz>
127__FP8_HOST_DEVICE_STATIC__
__hip_fp8_storage_t cast_to_f8(T _x,
int wm,
int we,
bool clip =
false,
128 bool stoch =
false,
unsigned int rng = 0) {
129 constexpr bool is_half = __hip_internal::is_same<T, _Float16>::value;
130 constexpr bool is_float = __hip_internal::is_same<T, float>::value;
131 constexpr bool is_double = __hip_internal::is_same<T, double>::value;
132 static_assert(is_half || is_float || is_double,
"Only half, float and double can be cast to f8");
134 const int mfmt = (
sizeof(T) == 8) ? 52 : ((
sizeof(T) == 4) ? 23 : 10);
135 unsigned long long x;
138 x =
reinterpret_cast<unsigned long long&
>(_x);
139 else if (
sizeof(T) == 4)
140 x =
reinterpret_cast<unsigned int&
>(_x);
142 x =
reinterpret_cast<unsigned short int&
>(_x);
145 unsigned long long head, mantissa;
148 unsigned long long fInf, mask;
150 if (
sizeof(T) == 8) {
151 head = x & 0xFFF0000000000000ull;
152 mantissa = x & 0xFFFFFFFFFFFFFull;
153 exponent = (head >> 52) & 0x7FF;
156 fInf = 0x7FF0000000000000ull;
157 mask = 0x7FFFFFFFFFFFFFFFull;
158 }
else if (
sizeof(T) == 4) {
159 head = x & 0xFF800000;
160 mantissa = x & 0x7FFFFF;
161 exponent = (head >> 23) & 0xFF;
168 mantissa = x & 0x3FF;
169 exponent = (head >> 10) & 0x1F;
175 unsigned int signed_inf = 0;
176 unsigned int nan = 0;
178 signed_inf = clip ? ((sign << 7) + 0x7f) : 0x80;
182 signed_inf = (sign << 7) + (clip ? 0x7e : 0x7f);
184 signed_inf = (sign << 7) + (clip ? 0x7b : 0x7c);
186 nan = (sign << 7) + 0x7f;
189 unsigned long long ifmax = 0;
190 if (
sizeof(T) == 8) {
192 ifmax = 0x40EC000000000000ull;
195 ifmax = 0x406E000000000000ull;
197 ifmax = 0x407C000000000000ull;
200 }
else if (
sizeof(T) == 4) {
222 if ((x & fInf) == fInf) {
223 if (is_fnuz)
return signed_inf;
224 return mantissa != 0 ? nan : signed_inf;
227 if ((x & mask) > ifmax) {
242 const int f8_bias = (1 << (we - 1)) - 1 + (is_fnuz ? 1 : 0);
243 const int f8_denormal_act_exponent = 1 - f8_bias;
248 int act_exponent, f8_exponent, exponent_diff;
257 act_exponent = exponent - bias + 1;
258 exponent_diff = f8_denormal_act_exponent -
261 act_exponent = exponent - bias;
262 if (act_exponent <= f8_denormal_act_exponent) {
268 exponent_diff = f8_denormal_act_exponent - act_exponent;
273 mantissa += (1ull << mfmt);
276 bool midpoint = (mantissa & ((1ull << (mfmt - wm + exponent_diff)) - 1)) ==
277 (1ull << (mfmt - wm + exponent_diff - 1));
284 if (exponent_diff > 0)
285 mantissa >>= exponent_diff;
286 else if (exponent_diff == -1)
287 mantissa <<= -exponent_diff;
288 bool implicit_one = mantissa & (1ull << mfmt);
291 (act_exponent + exponent_diff) + f8_bias - (implicit_one ? 0 : 1);
294 unsigned long long drop_mask = (1ull << (mfmt - wm)) - 1;
296 mantissa & (1ull << (mfmt - wm));
298 (stoch ? rng : (midpoint ? (odd ? mantissa : mantissa - 1ull) : mantissa)) & drop_mask;
301 if (f8_exponent == 0) {
302 if ((1ull << mfmt) & mantissa) {
306 if ((1ull << (mfmt + 1)) & mantissa) {
312 mantissa >>= (mfmt - wm);
315 const int max_exp = (1 << we) - 1;
316 if (f8_exponent > max_exp) {
318 mantissa = (1 << wm) - 1;
319 f8_exponent = max_exp;
325 if (f8_exponent == 0 && mantissa == 0)
return is_fnuz ? 0 : (sign << 7);
326 mantissa &= (1 << wm) - 1;
327 return (sign << 7) | (f8_exponent << wm) | mantissa;
332template <
typename T,
bool is_fnuz>
333__FP8_HOST_DEVICE_STATIC__ T cast_from_f8(
__hip_fp8_storage_t x,
int wm,
int we,
bool clip =
false) {
334 constexpr bool is_half = __hip_internal::is_same<T, _Float16>::value;
335 constexpr bool is_float = __hip_internal::is_same<T, float>::value;
336 constexpr bool is_double = __hip_internal::is_same<T, double>::value;
337 static_assert(is_half || is_float || is_double,
"only half, float and double are supported");
339 constexpr int weo = is_half ? 5 : (is_float ? 8 : 11);
340 constexpr int wmo = is_half ? 10 : (is_float ? 23 : 52);
342 T fInf, fNegInf, fNaN, fNeg0, fmax, fmin;
344 const unsigned short int ihInf = 0x7C00;
345 const unsigned short int ihNegInf = 0xFC00;
346 const unsigned short int ihNaN = 0x7C01;
347 const unsigned short int ihNeg0 = 0x8000;
349 const unsigned short int ifmax = 0x7B00;
350 const unsigned short int ifmin = 0xFB00;
351 fInf =
reinterpret_cast<const _Float16&
>(ihInf);
352 fNegInf =
reinterpret_cast<const _Float16&
>(ihNegInf);
353 fNaN =
reinterpret_cast<const _Float16&
>(ihNaN);
354 fNeg0 =
reinterpret_cast<const _Float16&
>(ihNeg0);
355 fmax =
reinterpret_cast<const _Float16&
>(ifmax);
356 fmin =
reinterpret_cast<const _Float16&
>(ifmin);
357 }
else if (is_float) {
358 const unsigned int ifInf = 0x7F800000;
359 const unsigned int ifNegInf = 0xFF800000;
360 const unsigned int ifNaN = 0x7F800001;
361 const unsigned int ifNeg0 = 0x80000000;
363 const unsigned int ifmax = 0x47600000;
364 const unsigned int ifmin = 0xC7600000;
365 fInf =
reinterpret_cast<const float&
>(ifInf);
366 fNegInf =
reinterpret_cast<const float&
>(ifNegInf);
367 fNaN =
reinterpret_cast<const float&
>(ifNaN);
368 fNeg0 =
reinterpret_cast<const float&
>(ifNeg0);
369 fmax =
reinterpret_cast<const float&
>(ifmax);
370 fmin =
reinterpret_cast<const float&
>(ifmin);
371 }
else if (is_double) {
372 const unsigned long long ifInf = 0x7FF0000000000000ull;
373 const unsigned long long ifNegInf = 0xFFF0000000000000ull;
374 const unsigned long long ifNaN = 0x7FF0000000000001ull;
375 const unsigned long long ifNeg0 = 0x8000000000000000ull;
377 const unsigned long long ifmax = 0x40EC000000000000ull;
378 const unsigned long long ifmin = 0xC0EC000000000000ull;
379 fInf =
reinterpret_cast<const double&
>(ifInf);
380 fNegInf =
reinterpret_cast<const double&
>(ifNegInf);
381 fNaN =
reinterpret_cast<const double&
>(ifNaN);
382 fNeg0 =
reinterpret_cast<const double&
>(ifNeg0);
383 fmax =
reinterpret_cast<const double&
>(ifmax);
384 fmin =
reinterpret_cast<const double&
>(ifmin);
391 unsigned long long sign = x >> 7;
392 unsigned long long mantissa = x & ((1 << wm) - 1);
393 int exponent = (x & 0x7F) >> wm;
403 if ((x & 0x7F) == 0x7F) {
406 }
else if ((x & 0x7C) == 0x7C) {
407 if ((x & 0x3) == 0) {
409 return sign ? fmin : fmax;
411 return sign ? fNegInf : fInf;
417 typename __hip_internal::conditional<
418 sizeof(T) == 2,
unsigned short int,
419 typename __hip_internal::conditional<
sizeof(T) == 4,
unsigned int,
420 unsigned long long>::type>::type retval;
422 if (we == 5 && is_half && !is_fnuz) {
424 return reinterpret_cast<const T&
>(retval);
427 const int exp_low_cutoff = (1 << (weo - 1)) - (1 << (we - 1)) + 1 - (is_fnuz ? 1 : 0);
431#if __HIP_DEVICE_COMPILE__
433 int sh = 1 + __clz(mantissa) - (32 - wm);
435 int sh = 1 + __builtin_clz(mantissa) - (32 - wm);
439 mantissa &= ((1ull << wm) - 1);
441 exponent += exp_low_cutoff - 1;
442 mantissa <<= wmo - wm;
446 mantissa |= 1 << wmo;
447 mantissa >>= 1 - exponent;
452 retval = (sign << 15) | (exponent << 10) | mantissa;
453 else if (
sizeof(T) == 4)
454 retval = (sign << 31) | (exponent << 23) | mantissa;
456 retval = (sign << 63) | (static_cast<unsigned long long>(exponent) << 52) | mantissa;
457 return reinterpret_cast<const T&
>(retval);
460#if HIP_FP8_CVT_FAST_PATH
463template <
bool stochastic_rounding = false>
466 unsigned int rng = 0) {
471 unsigned char i8val[4];
474 unsigned int ival = 0;
479 if ((val.i32val & 0x7F800000) != 0x7F800000) {
480 val.fval = __builtin_amdgcn_fmed3f(val.fval, 240.0, -240.0);
483 if ((val.i32val & 0x7F800000) != 0x7F800000) {
484 val.fval = __builtin_amdgcn_fmed3f(val.fval, 448.0, -448.0);
487 if ((val.i32val & 0x7F800000) != 0x7F800000) {
488 val.fval = __builtin_amdgcn_fmed3f(val.fval, 57344.0, -57344.0);
493 if (stochastic_rounding) {
495 ? __builtin_amdgcn_cvt_sr_fp8_f32(val.fval, rng, ival, 0)
496 : __builtin_amdgcn_cvt_sr_bf8_f32(val.fval, rng, ival, 0);
498 i8data = val.i8val[0];
501 ? __builtin_amdgcn_cvt_pk_fp8_f32(val.fval, val.fval, ival,
false)
502 : __builtin_amdgcn_cvt_pk_bf8_f32(val.fval, val.fval, ival,
false);
504 i8data = val.i8val[0];
512 static_assert(
sizeof(
float2) ==
sizeof(
unsigned int[2]));
513 static_assert(
sizeof(
float2) ==
sizeof(
unsigned short[4]));
515 unsigned int i32val[2];
516 unsigned short i16val[4];
523 if ((f2val.i32val[0] & 0x7F800000) != 0x7F800000) {
524 f2val.fval.x = __builtin_amdgcn_fmed3f(f2val.fval.x, 240.0, -240.0);
526 if ((f2val.i32val[1] & 0x7F800000) != 0x7F800000) {
527 f2val.fval.y = __builtin_amdgcn_fmed3f(f2val.fval.x, 240.0, -240.0);
530 if ((f2val.i32val[0] & 0x7F800000) != 0x7F800000) {
531 f2val.fval.x = __builtin_amdgcn_fmed3f(f2val.fval.x, 448.0, -448.0);
533 if ((f2val.i32val[1] & 0x7F800000) != 0x7F800000) {
534 f2val.fval.y = __builtin_amdgcn_fmed3f(f2val.fval.x, 448.0, -448.0);
537 if ((f2val.i32val[0] & 0x7F800000) != 0x7F800000) {
538 f2val.fval.x = __builtin_amdgcn_fmed3f(f2val.fval.x, 57344.0, -57344.0);
540 if ((f2val.i32val[1] & 0x7F800000) != 0x7F800000) {
541 f2val.fval.y = __builtin_amdgcn_fmed3f(f2val.fval.x, 57344.0, -57344.0);
547 ? __builtin_amdgcn_cvt_pk_fp8_f32(v.x, v.y, 0,
false)
548 : __builtin_amdgcn_cvt_pk_bf8_f32(v.x, v.y, 0,
false);
557 unsigned char i8val[4];
562 ? __builtin_amdgcn_cvt_f32_fp8(val.i32val, 0)
563 : __builtin_amdgcn_cvt_f32_bf8(val.i32val, 0);
571 unsigned short i16val[2];
576 ? __builtin_amdgcn_cvt_pk_f32_fp8(val.i32val,
false)
577 : __builtin_amdgcn_cvt_pk_f32_bf8(val.i32val,
false);
578 return float2{f2[0], f2[1]};
586 return static_cast<unsigned char>(a) == 0x80;
591 return (type ==
__HIP_E4M3) ? ((a & 0x7f) == 0x7f)
598 return (type ==
__HIP_E5M2) ? (a & 0x7f) == 0x7c :
false;
613#if HIP_FP8_CVT_FAST_PATH
614 return internal::cast_to_f8_from_f32<false>(f, sat ==
__HIP_SATFINITE, type);
619 return internal::cast_to_f8<float, true>(f, wm, we, sat ==
__HIP_SATFINITE);
624 return internal::cast_to_f8<float, false>(f, wm, we, sat ==
__HIP_SATFINITE);
640#if HIP_FP8_CVT_FAST_PATH
641 return internal::cast_to_f8x2_from_f32x2(f2, sat ==
__HIP_SATFINITE, type);
662 return internal::cast_to_f8<double, true>(d, wm, we, sat ==
__HIP_SATFINITE);
667 return internal::cast_to_f8<double, false>(d, wm, we, sat ==
__HIP_SATFINITE);
697 float fval = __hip_bfloat16(hr);
712 float2 f2 = __hip_bfloat162(hr);
728 return __half_raw{internal::cast_from_f8<_Float16, true>(x, wm, we)};
733 return __half_raw{internal::cast_from_f8<_Float16, false>(x, wm, we)};
746 __half2 ret(
static_cast<__half
>(
787 constexpr static unsigned int __we = 4;
788 constexpr static unsigned int __wm = 3;
800 __default_interpret)) {}
809 __default_interpret)) {}
818 __default_interpret)) {}
827 __default_interpret)) {}
836 __default_interpret)) {}
845 __default_interpret)) {}
870 __default_interpret)) {}
879 __default_interpret)) {}
890 __FP8_HOST_DEVICE__
operator __half()
const {
892 __FP8_HOST__
operator __half()
const {
899 __FP8_HOST_DEVICE__
operator __hip_bfloat16()
const {
901 __FP8_HOST__
operator __hip_bfloat16()
const {
904 return __hip_bfloat16(f);
909 __FP8_HOST_DEVICE__
operator bool()
const {
911 __FP8_HOST__
operator bool()
const {
914 return !(
static_cast<unsigned short>(__x) == 0);
919 __FP8_HOST_DEVICE__
operator char()
const {
921 __FP8_HOST__
operator char()
const {
923 if (internal::hip_fp8_fnuz_is_nan(__x)) {
927 auto fval = internal::cast_from_f8<float, true>(__x, __wm, __we);
928 auto llval =
static_cast<long long>(fval);
929 if (llval <= CHAR_MIN) {
931 }
else if (llval >= CHAR_MAX) {
934 return static_cast<char>(fval);
939 __FP8_HOST_DEVICE__
operator double()
const {
941 __FP8_HOST__
operator double()
const {
943 return internal::cast_from_f8<double, true>(__x, __wm, __we);
948 __FP8_HOST_DEVICE__
operator float()
const {
950 __FP8_HOST__
operator float()
const {
952#if HIP_FP8_CVT_FAST_PATH
953 return internal::cast_to_f32_from_f8(__x, __default_interpret);
955 return internal::cast_from_f8<float, true>(__x, __wm, __we);
961 __FP8_HOST_DEVICE__
operator int()
const {
963 __FP8_HOST__
operator int()
const {
965 if (internal::hip_fp8_fnuz_is_nan(__x)) {
970 return static_cast<int>(fval);
975 __FP8_HOST_DEVICE__
operator long int()
const {
977 __FP8_HOST__
operator long int()
const {
979 if (internal::hip_fp8_fnuz_is_nan(__x)) {
984 return static_cast<long>(fval);
989 __FP8_HOST_DEVICE__
operator long long int()
const {
991 __FP8_HOST__
operator long long int()
const {
993 if (internal::hip_fp8_fnuz_is_nan(__x)) {
998 return static_cast<long long>(fval);
1002#if HIP_FP8_TYPE_FNUZ
1003 __FP8_HOST_DEVICE__
operator short int()
const {
1005 __FP8_HOST__
operator short int()
const {
1007 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1012 auto llval =
static_cast<long long>(fval);
1013 if (llval <= SHRT_MIN) {
1015 }
else if (llval >= SHRT_MAX) {
1018 return static_cast<short>(fval);
1022#if HIP_FP8_TYPE_FNUZ
1023 __FP8_HOST_DEVICE__
operator signed char()
const {
1025 __FP8_HOST__
operator signed char()
const {
1027 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1032 auto llval =
static_cast<long long>(fval);
1033 if (llval <= SCHAR_MIN) {
1035 }
else if (llval >= SCHAR_MAX) {
1038 return static_cast<signed char>(fval);
1042#if HIP_FP8_TYPE_FNUZ
1043 __FP8_HOST_DEVICE__
operator unsigned char()
const {
1045 __FP8_HOST__
operator unsigned char()
const {
1047 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1052 auto llval =
static_cast<long long>(fval);
1055 }
else if (llval >= UCHAR_MAX) {
1058 return static_cast<unsigned char>(fval);
1062#if HIP_FP8_TYPE_FNUZ
1063 __FP8_HOST_DEVICE__
operator unsigned int()
const {
1065 __FP8_HOST__
operator unsigned int()
const {
1067 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1072 auto llval =
static_cast<long long>(fval);
1076 return static_cast<unsigned int>(fval);
1080#if HIP_FP8_TYPE_FNUZ
1081 __FP8_HOST_DEVICE__
operator unsigned long int()
const {
1083 __FP8_HOST__
operator unsigned long int()
const {
1085 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1090 auto llval =
static_cast<long long>(fval);
1094 return static_cast<unsigned long>(fval);
1098#if HIP_FP8_TYPE_FNUZ
1099 __FP8_HOST_DEVICE__
operator unsigned long long int()
const {
1101 __FP8_HOST__
operator unsigned long long int()
const {
1103 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1108 auto llval =
static_cast<long long>(fval);
1112 return static_cast<unsigned long long>(fval);
1116#if HIP_FP8_TYPE_FNUZ
1117 __FP8_HOST_DEVICE__
operator unsigned short int()
const {
1119 __FP8_HOST__
operator unsigned short int()
const {
1121 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1126 auto llval =
static_cast<long long>(fval);
1130 return static_cast<unsigned short>(fval);
1142 static constexpr unsigned int __we = 4;
1143 static constexpr unsigned int __wm = 3;
1146#if HIP_FP8_TYPE_FNUZ
1154#if HIP_FP8_TYPE_FNUZ
1162#if HIP_FP8_TYPE_FNUZ
1170#if HIP_FP8_TYPE_FNUZ
1178#if HIP_FP8_TYPE_FNUZ
1185#if HIP_FP8_TYPE_FNUZ
1186 __FP8_HOST_DEVICE__
operator __half2()
const {
1188 __FP8_HOST__
operator __half2()
const {
1194#if HIP_FP8_TYPE_FNUZ
1195 __FP8_HOST_DEVICE__
operator float2()
const {
1197 __FP8_HOST__
operator float2()
const {
1199#if HIP_FP8_CVT_FAST_PATH
1200 return internal::cast_to_f32x2_from_f8x2(__x, __default_interpret);
1218 static constexpr unsigned int __we = 4;
1219 static constexpr unsigned int __wm = 3;
1222#if HIP_FP8_TYPE_FNUZ
1229 val.x, __default_saturation, __default_interpret)) |
1231 val.y, __default_saturation, __default_interpret))
1234 val.z, __default_saturation, __default_interpret))
1237 val.w, __default_saturation, __default_interpret))
1241#if HIP_FP8_TYPE_FNUZ
1248 val.x, __default_saturation, __default_interpret)) |
1250 val.y, __default_saturation, __default_interpret))
1253 val.z, __default_saturation, __default_interpret))
1256 val.w, __default_saturation, __default_interpret))
1260#if HIP_FP8_TYPE_FNUZ
1266 reinterpret_cast<unsigned short>(
1268 reinterpret_cast<unsigned short>(
1273#if HIP_FP8_TYPE_FNUZ
1280 high, __default_saturation, __default_interpret)) |
1282 low, __default_saturation, __default_interpret))
1286#if HIP_FP8_TYPE_FNUZ
1293#if HIP_FP8_TYPE_FNUZ
1294 __FP8_HOST_DEVICE__
operator float4()
const {
1296 __FP8_HOST__
operator float4()
const {
1301#if HIP_FP8_CVT_FAST_PATH
1302 float2 high = internal::cast_to_f32x2_from_f8x2(fp8x2_high, __default_interpret);
1303 float2 low = internal::cast_to_f32x2_from_f8x2(fp8x2_low, __default_interpret);
1305 float2 high =
float2(internal::cast_from_f8<float, true>(
1307 internal::cast_from_f8<float, true>(
1309 float2 low =
float2(internal::cast_from_f8<float, true>(
1311 internal::cast_from_f8<float, true>(
1314 return float4(low.x, low.y, high.x, high.y);
1326 static constexpr unsigned int __we = 5;
1327 static constexpr unsigned int __wm = 2;
1334#if HIP_FP8_TYPE_FNUZ
1340 __default_interpret)) {}
1343#if HIP_FP8_TYPE_FNUZ
1349 __default_interpret)) {}
1352#if HIP_FP8_TYPE_FNUZ
1358 __default_interpret)) {}
1361#if HIP_FP8_TYPE_FNUZ
1367 __default_interpret)) {}
1370#if HIP_FP8_TYPE_FNUZ
1376 __default_interpret)) {}
1379#if HIP_FP8_TYPE_FNUZ
1385 __default_interpret)) {}
1388#if HIP_FP8_TYPE_FNUZ
1396#if HIP_FP8_TYPE_FNUZ
1404#if HIP_FP8_TYPE_FNUZ
1410 __default_interpret)) {}
1413#if HIP_FP8_TYPE_FNUZ
1419 __default_interpret)) {}
1422#if HIP_FP8_TYPE_FNUZ
1429#if HIP_FP8_TYPE_FNUZ
1430 __FP8_HOST_DEVICE__
operator float()
const {
1432 __FP8_HOST__
operator float()
const {
1434#if HIP_FP8_CVT_FAST_PATH
1435 return internal::cast_to_f32_from_f8(__x, __default_interpret);
1437 return internal::cast_from_f8<float, true>(__x, __wm, __we);
1442#if HIP_FP8_TYPE_FNUZ
1443 __FP8_HOST_DEVICE__
operator __half()
const {
1445 __FP8_HOST__
operator __half()
const {
1451#if HIP_FP8_TYPE_FNUZ
1452 __FP8_HOST_DEVICE__
operator __hip_bfloat16()
const {
1454 __FP8_HOST__
operator __hip_bfloat16()
const {
1457 return __hip_bfloat16(f);
1461#if HIP_FP8_TYPE_FNUZ
1462 __FP8_HOST_DEVICE__
operator bool()
const {
1464 __FP8_HOST__
operator bool()
const {
1467 return !(
static_cast<unsigned short>(__x) == 0);
1471#if HIP_FP8_TYPE_FNUZ
1472 __FP8_HOST_DEVICE__
operator char()
const {
1474 __FP8_HOST__
operator char()
const {
1476 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1481 auto llval =
static_cast<long long>(fval);
1482 if (llval <= CHAR_MIN) {
1484 }
else if (llval >= CHAR_MAX) {
1487 return static_cast<char>(fval);
1491#if HIP_FP8_TYPE_FNUZ
1492 __FP8_HOST_DEVICE__
operator double()
const {
1494 __FP8_HOST__
operator double()
const {
1496 return internal::cast_from_f8<double, true>(__x, __wm, __we);
1500#if HIP_FP8_TYPE_FNUZ
1501 __FP8_HOST_DEVICE__
operator int()
const {
1503 __FP8_HOST__
operator int()
const {
1505 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1510 return static_cast<int>(fval);
1514#if HIP_FP8_TYPE_FNUZ
1515 __FP8_HOST_DEVICE__
operator long int()
const {
1517 __FP8_HOST__
operator long int()
const {
1519 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1524 return static_cast<long>(fval);
1528#if HIP_FP8_TYPE_FNUZ
1529 __FP8_HOST_DEVICE__
operator long long int()
const {
1531 __FP8_HOST__
operator long long int()
const {
1533 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1538 return static_cast<long long>(fval);
1542#if HIP_FP8_TYPE_FNUZ
1543 __FP8_HOST_DEVICE__
operator short int()
const {
1545 __FP8_HOST__
operator short int()
const {
1547 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1552 auto llval =
static_cast<long long>(fval);
1553 if (llval <= SHRT_MIN) {
1555 }
else if (llval >= SHRT_MAX) {
1558 return static_cast<short>(fval);
1562#if HIP_FP8_TYPE_FNUZ
1563 __FP8_HOST_DEVICE__
operator signed char()
const {
1565 __FP8_HOST__
operator signed char()
const {
1567 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1572 auto llval =
static_cast<long long>(fval);
1573 if (llval <= SCHAR_MIN) {
1575 }
else if (llval >= SCHAR_MAX) {
1578 return static_cast<signed char>(fval);
1582#if HIP_FP8_TYPE_FNUZ
1583 __FP8_HOST_DEVICE__
operator unsigned char()
const {
1585 __FP8_HOST__
operator unsigned char()
const {
1587 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1592 auto llval =
static_cast<long long>(fval);
1595 }
else if (llval >= UCHAR_MAX) {
1598 return static_cast<unsigned char>(fval);
1602#if HIP_FP8_TYPE_FNUZ
1603 __FP8_HOST_DEVICE__
operator unsigned int()
const {
1605 __FP8_HOST__
operator unsigned int()
const {
1607 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1612 auto llval =
static_cast<long long>(fval);
1616 return static_cast<unsigned int>(fval);
1620#if HIP_FP8_TYPE_FNUZ
1621 __FP8_HOST_DEVICE__
operator unsigned long int()
const {
1623 __FP8_HOST__
operator unsigned long int()
const {
1625 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1630 auto llval =
static_cast<long long>(fval);
1634 return static_cast<unsigned long>(fval);
1638#if HIP_FP8_TYPE_FNUZ
1639 __FP8_HOST_DEVICE__
operator unsigned long long int()
const {
1641 __FP8_HOST__
operator unsigned long long int()
const {
1643 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1648 auto llval =
static_cast<long long>(fval);
1652 return static_cast<unsigned long long>(fval);
1656#if HIP_FP8_TYPE_FNUZ
1657 __FP8_HOST_DEVICE__
operator unsigned short int()
const {
1659 __FP8_HOST__
operator unsigned short int()
const {
1661 if (internal::hip_fp8_fnuz_is_nan(__x)) {
1666 auto llval =
static_cast<long long>(fval);
1670 return static_cast<unsigned short>(fval);
1682 static constexpr unsigned int __we = 5;
1683 static constexpr unsigned int __wm = 2;
1686#if HIP_FP8_TYPE_FNUZ
1694#if HIP_FP8_TYPE_FNUZ
1702#if HIP_FP8_TYPE_FNUZ
1710#if HIP_FP8_TYPE_FNUZ
1718#if HIP_FP8_TYPE_FNUZ
1725#if HIP_FP8_TYPE_FNUZ
1726 __FP8_HOST_DEVICE__
operator __half2()
const {
1728 __FP8_HOST__
operator __half2()
const {
1734#if HIP_FP8_TYPE_FNUZ
1735 __FP8_HOST_DEVICE__
operator float2()
const {
1737 __FP8_HOST__
operator float2()
const {
1739#if HIP_FP8_CVT_FAST_PATH
1740 return internal::cast_to_f32x2_from_f8x2(__x, __default_interpret);
1758 static constexpr unsigned int __we = 5;
1759 static constexpr unsigned int __wm = 2;
1762#if HIP_FP8_TYPE_FNUZ
1769 val.x, __default_saturation, __default_interpret)) |
1771 val.y, __default_saturation, __default_interpret))
1774 val.z, __default_saturation, __default_interpret))
1777 val.w, __default_saturation, __default_interpret))
1781#if HIP_FP8_TYPE_FNUZ
1788 val.x, __default_saturation, __default_interpret)) |
1790 val.y, __default_saturation, __default_interpret))
1793 val.z, __default_saturation, __default_interpret))
1796 val.w, __default_saturation, __default_interpret))
1800#if HIP_FP8_TYPE_FNUZ
1806 reinterpret_cast<unsigned short>(
1808 reinterpret_cast<unsigned short>(
1813#if HIP_FP8_TYPE_FNUZ
1820 high, __default_saturation, __default_interpret)) |
1822 low, __default_saturation, __default_interpret))
1826#if HIP_FP8_TYPE_FNUZ
1833#if HIP_FP8_TYPE_FNUZ
1834 __FP8_HOST_DEVICE__
operator float4()
const {
1836 __FP8_HOST__
operator float4()
const {
1841#if HIP_FP8_CVT_FAST_PATH
1842 float2 high = internal::cast_to_f32x2_from_f8x2(fp8x2_high, __default_interpret);
1843 float2 low = internal::cast_to_f32x2_from_f8x2(fp8x2_low, __default_interpret);
1845 float2 high =
float2(internal::cast_from_f8<float, true>(
1847 internal::cast_from_f8<float, true>(
1849 float2 low =
float2(internal::cast_from_f8<float, true>(
1851 internal::cast_from_f8<float, true>(
1854 return float4(low.x, low.y, high.x, high.y);
1866 constexpr static unsigned int __we = 4;
1867 constexpr static unsigned int __wm = 3;
1879 __default_interpret)) {}
1888 __default_interpret)) {}
1893 __default_interpret)) {}
1902 __default_interpret)) {}
1911 __default_interpret)) {}
1920 __default_interpret)) {}
1945 __default_interpret)) {}
1954 __default_interpret)) {}
1966__FP8_HOST_DEVICE__
operator __half()
const {
1968__FP8_HOST__
operator __half()
const {
1975__FP8_HOST_DEVICE__
operator __hip_bfloat16()
const {
1977__FP8_HOST__
operator __hip_bfloat16()
const {
1980 return __hip_bfloat16(f);
1985__FP8_HOST_DEVICE__
operator bool()
const {
1987__FP8_HOST__
operator bool()
const {
1990 return !(
static_cast<unsigned short>(__x) == 0 ||
static_cast<unsigned short>(__x) == 0x80);
1995__FP8_HOST_DEVICE__
operator char()
const {
1997__FP8_HOST__
operator char()
const {
1999 if (internal::hip_fp8_ocp_is_nan(__x,__default_interpret)) {
2003 auto fval = internal::cast_from_f8<float, false>(__x, __wm, __we);
2004 auto llval =
static_cast<long long>(fval);
2005 if (llval <= CHAR_MIN) {
2007 }
else if (llval >= CHAR_MAX) {
2010 return static_cast<char>(fval);
2015__FP8_HOST_DEVICE__
operator double()
const {
2017__FP8_HOST__
operator double()
const {
2019 return internal::cast_from_f8<double, false>(__x, __wm, __we);
2024__FP8_HOST_DEVICE__
operator float()
const {
2026__FP8_HOST__
operator float()
const {
2028#if HIP_FP8_CVT_FAST_PATH
2029 return internal::cast_to_f32_from_f8(__x, __default_interpret);
2031 return internal::cast_from_f8<float, false>(__x, __wm, __we);
2037__FP8_HOST_DEVICE__
operator int()
const {
2039__FP8_HOST__
operator int()
const {
2041 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2046 return static_cast<int>(fval);
2051__FP8_HOST_DEVICE__
operator long int()
const {
2053__FP8_HOST__
operator long int()
const {
2055 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2060 return static_cast<long>(fval);
2065__FP8_HOST_DEVICE__
operator long long int()
const {
2067__FP8_HOST__
operator long long int()
const {
2069 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2074 return static_cast<long long>(fval);
2079__FP8_HOST_DEVICE__
operator short int()
const {
2081__FP8_HOST__
operator short int()
const {
2083 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2088 auto llval =
static_cast<long long>(fval);
2089 if (llval <= SHRT_MIN) {
2091 }
else if (llval >= SHRT_MAX) {
2094 return static_cast<short>(fval);
2099__FP8_HOST_DEVICE__
operator signed char()
const {
2101__FP8_HOST__
operator signed char()
const {
2103 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2108 auto llval =
static_cast<long long>(fval);
2109 if (llval <= SCHAR_MIN) {
2111 }
else if (llval >= SCHAR_MAX) {
2114 return static_cast<signed char>(fval);
2119__FP8_HOST_DEVICE__
operator unsigned char()
const {
2121__FP8_HOST__
operator unsigned char()
const {
2123 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2128 auto llval =
static_cast<long long>(fval);
2131 }
else if (llval >= UCHAR_MAX) {
2134 return static_cast<unsigned char>(fval);
2139__FP8_HOST_DEVICE__
operator unsigned int()
const {
2141__FP8_HOST__
operator unsigned int()
const {
2143 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2148 auto llval =
static_cast<long long>(fval);
2152 return static_cast<unsigned int>(fval);
2157__FP8_HOST_DEVICE__
operator unsigned long int()
const {
2159__FP8_HOST__
operator unsigned long int()
const {
2161 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2166 auto llval =
static_cast<long long>(fval);
2170 return static_cast<unsigned long>(fval);
2175__FP8_HOST_DEVICE__
operator unsigned long long int()
const {
2177__FP8_HOST__
operator unsigned long long int()
const {
2179 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2184 auto llval =
static_cast<long long>(fval);
2188 return static_cast<unsigned long long>(fval);
2193__FP8_HOST_DEVICE__
operator unsigned short int()
const {
2195__FP8_HOST__
operator unsigned short int()
const {
2197 if (internal::hip_fp8_ocp_is_nan(__x,__default_interpret)) {
2202 auto llval =
static_cast<long long>(fval);
2206 return static_cast<unsigned short>(fval);
2218 static constexpr unsigned int __we = 4;
2219 static constexpr unsigned int __wm = 3;
2263__FP8_HOST_DEVICE__
operator __half2()
const {
2265__FP8_HOST__
operator __half2()
const {
2274__FP8_HOST__
operator float2()
const {
2276#if HIP_FP8_CVT_FAST_PATH
2277 return internal::cast_to_f32x2_from_f8x2(__x, __default_interpret);
2280 internal::cast_from_f8<float, false>(
static_cast<__hip_fp8_storage_t>(__x >> 8), __wm, __we));
2293 static constexpr unsigned int __we = 4;
2294 static constexpr unsigned int __wm = 3;
2305 val.x, __default_saturation, __default_interpret)) |
2307 val.y, __default_saturation, __default_interpret))
2310 val.z, __default_saturation, __default_interpret))
2313 val.w, __default_saturation, __default_interpret))
2324 val.x, __default_saturation, __default_interpret)) |
2326 val.y, __default_saturation, __default_interpret))
2329 val.z, __default_saturation, __default_interpret))
2332 val.w, __default_saturation, __default_interpret))
2339__FP8_HOST__
__hip_fp8x4_e4m3(
const __hip_bfloat162 low,
const __hip_bfloat162 high)
2342 reinterpret_cast<unsigned short>(
2344 reinterpret_cast<unsigned short>(
2356 high, __default_saturation, __default_interpret)) |
2358 low, __default_saturation, __default_interpret))
2372__FP8_HOST__
operator float4()
const {
2377#if HIP_FP8_CVT_FAST_PATH
2378 float2 high = internal::cast_to_f32x2_from_f8x2(fp8x2_high, __default_interpret);
2379 float2 low = internal::cast_to_f32x2_from_f8x2(fp8x2_low, __default_interpret);
2381 float2 high =
float2(internal::cast_from_f8<float, false>(
2383 internal::cast_from_f8<float, false>(
2385 float2 low =
float2(internal::cast_from_f8<float, false>(
2387 internal::cast_from_f8<float, false>(
2390 return float4(low.x, low.y, high.x, high.y);
2402 static constexpr unsigned int __we = 5;
2403 static constexpr unsigned int __wm = 2;
2417 __default_interpret)) {}
2426 __default_interpret)) {}
2435 __default_interpret)) {}
2444 __default_interpret)) {}
2453 __default_interpret)) {}
2462 __default_interpret)) {}
2487 __default_interpret)) {}
2496 __default_interpret)) {}
2507__FP8_HOST_DEVICE__
operator float()
const {
2509__FP8_HOST__
operator float()
const {
2511#if HIP_FP8_CVT_FAST_PATH
2512 return internal::cast_to_f32_from_f8(__x, __default_interpret);
2514 return internal::cast_from_f8<float, false>(__x, __wm, __we, __default_saturation ==
__HIP_SATFINITE);
2520__FP8_HOST_DEVICE__
operator __half()
const {
2522__FP8_HOST__
operator __half()
const {
2529__FP8_HOST_DEVICE__
operator __hip_bfloat16()
const {
2531__FP8_HOST__
operator __hip_bfloat16()
const {
2534 return __hip_bfloat16(f);
2539__FP8_HOST_DEVICE__
operator bool()
const {
2541__FP8_HOST__
operator bool()
const {
2544 return !(
static_cast<unsigned short>(__x) == 0 ||
static_cast<unsigned short>(__x) == 0x80);
2549__FP8_HOST_DEVICE__
operator char()
const {
2551__FP8_HOST__
operator char()
const {
2553 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2558 auto llval =
static_cast<long long>(fval);
2559 if (llval <= CHAR_MIN) {
2561 }
else if (llval >= CHAR_MAX) {
2564 return static_cast<char>(fval);
2569__FP8_HOST_DEVICE__
operator double()
const {
2571__FP8_HOST__
operator double()
const {
2573 return internal::cast_from_f8<double, false>(__x, __wm, __we, __default_saturation ==
__HIP_SATFINITE);
2578__FP8_HOST_DEVICE__
operator int()
const {
2580__FP8_HOST__
operator int()
const {
2582 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2587 return static_cast<int>(fval);
2592__FP8_HOST_DEVICE__
operator long int()
const {
2594__FP8_HOST__
operator long int()
const {
2596 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2601 return static_cast<long>(fval);
2606__FP8_HOST_DEVICE__
operator long long int()
const {
2608__FP8_HOST__
operator long long int()
const {
2610 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2615 return static_cast<long long>(fval);
2620__FP8_HOST_DEVICE__
operator short int()
const {
2622__FP8_HOST__
operator short int()
const {
2624 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2629 auto llval =
static_cast<long long>(fval);
2630 if (llval <= SHRT_MIN) {
2632 }
else if (llval >= SHRT_MAX) {
2635 return static_cast<short>(fval);
2640__FP8_HOST_DEVICE__
operator signed char()
const {
2642__FP8_HOST__
operator signed char()
const {
2644 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2649 auto llval =
static_cast<long long>(fval);
2650 if (llval <= SCHAR_MIN) {
2652 }
else if (llval >= SCHAR_MAX) {
2655 return static_cast<signed char>(fval);
2660__FP8_HOST_DEVICE__
operator unsigned char()
const {
2662__FP8_HOST__
operator unsigned char()
const {
2664 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2669 auto llval =
static_cast<long long>(fval);
2672 }
else if (llval >= UCHAR_MAX) {
2675 return static_cast<unsigned char>(fval);
2680__FP8_HOST_DEVICE__
operator unsigned int()
const {
2682__FP8_HOST__
operator unsigned int()
const {
2684 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2689 auto llval =
static_cast<long long>(fval);
2693 return static_cast<unsigned int>(fval);
2698__FP8_HOST_DEVICE__
operator unsigned long int()
const {
2700__FP8_HOST__
operator unsigned long int()
const {
2702 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2707 auto llval =
static_cast<long long>(fval);
2711 return static_cast<unsigned long>(fval);
2716__FP8_HOST_DEVICE__
operator unsigned long long int()
const {
2718__FP8_HOST__
operator unsigned long long int()
const {
2720 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2725 auto llval =
static_cast<long long>(fval);
2729 return static_cast<unsigned long long>(fval);
2734__FP8_HOST_DEVICE__
operator unsigned short int()
const {
2736__FP8_HOST__
operator unsigned short int()
const {
2738 if (internal::hip_fp8_ocp_is_nan(__x, __default_interpret)) {
2743 auto llval =
static_cast<long long>(fval);
2747 return static_cast<unsigned short>(fval);
2759 static constexpr unsigned int __we = 5;
2760 static constexpr unsigned int __wm = 2;
2804__FP8_HOST_DEVICE__
operator __half2()
const {
2806__FP8_HOST__
operator __half2()
const {
2815__FP8_HOST__
operator float2()
const {
2817#if HIP_FP8_CVT_FAST_PATH
2818 return internal::cast_to_f32x2_from_f8x2(__x, __default_interpret);
2834 static constexpr unsigned int __we = 5;
2835 static constexpr unsigned int __wm = 2;
2845 val.x, __default_saturation, __default_interpret)) |
2847 val.y, __default_saturation, __default_interpret))
2850 val.z, __default_saturation, __default_interpret))
2853 val.w, __default_saturation, __default_interpret))
2864 val.x, __default_saturation, __default_interpret)) |
2866 val.y, __default_saturation, __default_interpret))
2869 val.z, __default_saturation, __default_interpret))
2872 val.w, __default_saturation, __default_interpret))
2879__FP8_HOST__
__hip_fp8x4_e5m2(
const __hip_bfloat162 low,
const __hip_bfloat162 high)
2882 reinterpret_cast<unsigned short>(
2884 reinterpret_cast<unsigned short>(
2896 high, __default_saturation, __default_interpret)) |
2898 low, __default_saturation, __default_interpret))
2912__FP8_HOST__
operator float4()
const {
2917#if HIP_FP8_CVT_FAST_PATH
2918 float2 high = internal::cast_to_f32x2_from_f8x2(fp8x2_high, __default_interpret);
2919 float2 low = internal::cast_to_f32x2_from_f8x2(fp8x2_low, __default_interpret);
2921 float2 high =
float2(internal::cast_from_f8<float, false>(
2923 internal::cast_from_f8<float, false>(
2925 float2 low =
float2(internal::cast_from_f8<float, false>(
2927 internal::cast_from_f8<float, false>(
2930 return float4(low.x, low.y, high.x, high.y);
hip_bf16.h provides struct for __hip_bfloat16 types
__hip_saturation_t
Describes saturation behavior.
Definition amd_hip_fp8.h:96
@ __HIP_SATFINITE
Definition amd_hip_fp8.h:98
@ __HIP_NOSAT
Definition amd_hip_fp8.h:97
__FP8_HOST_DEVICE_STATIC__ __half2_raw __hip_cvt_fp8x2_to_halfraw2(const __hip_fp8x2_storage_t x, const __hip_fp8_interpretation_t type)
convert __hip_fp8x2_storage_t to __half2_raw
Definition amd_hip_fp8.h:745
__hip_fp8_interpretation_t
Describes FP8 interpretation.
Definition amd_hip_fp8.h:86
@ __HIP_E4M3_FNUZ
Definition amd_hip_fp8.h:89
@ __HIP_E5M2
Definition amd_hip_fp8.h:88
@ __HIP_E4M3
Definition amd_hip_fp8.h:87
@ __HIP_E5M2_FNUZ
Definition amd_hip_fp8.h:90
__FP8_HOST_DEVICE_STATIC__ __hip_fp8x2_storage_t __hip_cvt_double2_to_fp8x2(const double2 d2, const __hip_saturation_t sat, const __hip_fp8_interpretation_t type)
convert double2 to __hip_fp8x2_storage_t
Definition amd_hip_fp8.h:679
__FP8_HOST_DEVICE_STATIC__ __hip_fp8_storage_t __hip_cvt_double_to_fp8(const double d, const __hip_saturation_t sat, const __hip_fp8_interpretation_t type)
convert double to __hip_fp8_storage_t
Definition amd_hip_fp8.h:657
__FP8_HOST_DEVICE_STATIC__ __hip_fp8x2_storage_t __hip_cvt_halfraw2_to_fp8x2(const __half2_raw x, const __hip_saturation_t sat, const __hip_fp8_interpretation_t type)
convert __half2_raw to __hip_fp8x2_storage_t
Definition amd_hip_fp8.h:774
unsigned short int __hip_fp8x2_storage_t
type to store two fp8 numbers
Definition amd_hip_fp8.h:112
__FP8_HOST_DEVICE_STATIC__ __hip_fp8_storage_t __hip_cvt_halfraw_to_fp8(const __half_raw x, const __hip_saturation_t sat, const __hip_fp8_interpretation_t type)
convert __half_raw to __hip_fp8_storage_t
Definition amd_hip_fp8.h:761
__FP8_HOST_DEVICE_STATIC__ __hip_fp8_storage_t __hip_cvt_float_to_fp8(const float f, const __hip_saturation_t sat, const __hip_fp8_interpretation_t type)
convert float to __hip_fp8_storage_t
Definition amd_hip_fp8.h:611
__FP8_HOST_DEVICE_STATIC__ __hip_fp8_storage_t __hip_cvt_bfloat16raw_to_fp8(const __hip_bfloat16_raw hr, const __hip_saturation_t sat, const __hip_fp8_interpretation_t type)
convert __hip_bfloat16_raw to __hip_fp8_storage_t
Definition amd_hip_fp8.h:695
unsigned int __hip_fp8x4_storage_t
type to store four fp8 numbers
Definition amd_hip_fp8.h:119
__FP8_HOST_DEVICE_STATIC__ __hip_fp8x2_storage_t __hip_cvt_bfloat16raw2_to_fp8x2(const __hip_bfloat162_raw hr, const __hip_saturation_t sat, const __hip_fp8_interpretation_t type)
convert double2 to __hip_fp8x2_storage_t
Definition amd_hip_fp8.h:710
unsigned char __hip_fp8_storage_t
type to store single fp8 number
Definition amd_hip_fp8.h:105
__FP8_HOST_DEVICE_STATIC__ __half_raw __hip_cvt_fp8_to_halfraw(const __hip_fp8_storage_t x, const __hip_fp8_interpretation_t type)
convert __hip_fp8_storage_t to __half_raw
Definition amd_hip_fp8.h:724
__FP8_HOST_DEVICE_STATIC__ __hip_fp8x2_storage_t __hip_cvt_float2_to_fp8x2(const float2 f2, const __hip_saturation_t sat, const __hip_fp8_interpretation_t type)
convert float2 to __hip_fp8x2_storage_t
Definition amd_hip_fp8.h:638
struct representing single fp8 number with e4m3 interpretation
Definition amd_hip_fp8.h:783
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const unsigned int val)
Definition amd_hip_fp8.h:831
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz()=default
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const double f)
Definition amd_hip_fp8.h:849
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const __half f)
Definition amd_hip_fp8.h:874
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const short int val)
Definition amd_hip_fp8.h:813
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const float f)
Definition amd_hip_fp8.h:857
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const __hip_bfloat16 f)
Definition amd_hip_fp8.h:865
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const long int val)
Definition amd_hip_fp8.h:795
static constexpr __hip_saturation_t __default_saturation
raw storage of fp8 number
Definition amd_hip_fp8.h:785
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const unsigned long int val)
Definition amd_hip_fp8.h:822
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const unsigned short int val)
Definition amd_hip_fp8.h:840
__FP8_HOST_DEVICE__ __hip_fp8_e4m3_fnuz(const int val)
Definition amd_hip_fp8.h:804
struct representing two fp8 numbers with e4m3 interpretation
Definition amd_hip_fp8.h:1138
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3_fnuz(const float2 val)
Definition amd_hip_fp8.h:1155
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3_fnuz(const double2 val)
Definition amd_hip_fp8.h:1147
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3_fnuz(const __half2 val)
Definition amd_hip_fp8.h:1171
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3_fnuz()=default
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3_fnuz(const __hip_bfloat162 val)
Definition amd_hip_fp8.h:1163
struct representing four fp8 numbers with e4m3 interpretation
Definition amd_hip_fp8.h:1214
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3_fnuz(const __hip_bfloat162 low, const __hip_bfloat162 high)
Definition amd_hip_fp8.h:1261
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3_fnuz()=default
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3_fnuz(const double4 val)
Definition amd_hip_fp8.h:1223
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3_fnuz(const __half2 low, const __half2 high)
Definition amd_hip_fp8.h:1274
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3_fnuz(const float4 val)
Definition amd_hip_fp8.h:1242
struct representing one fp8 number with e5m2 interpretation
Definition amd_hip_fp8.h:1322
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const unsigned short int val)
Definition amd_hip_fp8.h:1380
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz()=default
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const __hip_bfloat16 f)
Definition amd_hip_fp8.h:1405
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const long int val)
Definition amd_hip_fp8.h:1335
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const unsigned int val)
Definition amd_hip_fp8.h:1371
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const __half f)
Definition amd_hip_fp8.h:1414
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const int val)
Definition amd_hip_fp8.h:1344
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const double f)
Definition amd_hip_fp8.h:1389
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const short int val)
Definition amd_hip_fp8.h:1353
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const float f)
Definition amd_hip_fp8.h:1397
__FP8_HOST_DEVICE__ __hip_fp8_e5m2_fnuz(const unsigned long int val)
Definition amd_hip_fp8.h:1362
struct representing two fp8 numbers with e5m2 interpretation
Definition amd_hip_fp8.h:1678
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2_fnuz(const float2 val)
Definition amd_hip_fp8.h:1695
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2_fnuz(const __half2 val)
Definition amd_hip_fp8.h:1711
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2_fnuz(const __hip_bfloat162 val)
Definition amd_hip_fp8.h:1703
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2_fnuz(const double2 val)
Definition amd_hip_fp8.h:1687
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2_fnuz()=default
struct representing four fp8 numbers with e5m2 interpretation
Definition amd_hip_fp8.h:1754
__FP8_HOST_DEVICE__ __hip_fp8x4_e5m2_fnuz(const __hip_bfloat162 low, const __hip_bfloat162 high)
Definition amd_hip_fp8.h:1801
__FP8_HOST_DEVICE__ __hip_fp8x4_e5m2_fnuz(const float4 val)
Definition amd_hip_fp8.h:1782
__FP8_HOST_DEVICE__ __hip_fp8x4_e5m2_fnuz(const __half2 low, const __half2 high)
Definition amd_hip_fp8.h:1814
__FP8_HOST_DEVICE__ __hip_fp8x4_e5m2_fnuz(const double4 val)
Definition amd_hip_fp8.h:1763
struct representing ocp fp8 numbers with e4m3 interpretation
Definition amd_hip_fp8.h:1862
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const __hip_bfloat16 f)
Definition amd_hip_fp8.h:1940
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const long int val)
Definition amd_hip_fp8.h:1874
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const unsigned short int val)
Definition amd_hip_fp8.h:1915
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const float f)
Definition amd_hip_fp8.h:1932
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const short int val)
Definition amd_hip_fp8.h:1891
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const __half f)
Definition amd_hip_fp8.h:1949
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const unsigned long int val)
Definition amd_hip_fp8.h:1897
__FP8_HOST_DEVICE__ __hip_fp8_e4m3()=default
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const int val)
Definition amd_hip_fp8.h:1883
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const unsigned int val)
Definition amd_hip_fp8.h:1906
__FP8_HOST_DEVICE__ __hip_fp8_e4m3(const double f)
Definition amd_hip_fp8.h:1924
struct representing two ocp fp8 numbers with e4m3 interpretation
Definition amd_hip_fp8.h:2214
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3(const float2 val)
Definition amd_hip_fp8.h:2232
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3()=default
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3(const __half2 val)
Definition amd_hip_fp8.h:2248
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3(const __hip_bfloat162 val)
Definition amd_hip_fp8.h:2240
__FP8_HOST_DEVICE__ __hip_fp8x2_e4m3(const double2 val)
Definition amd_hip_fp8.h:2224
struct representing four ocp fp8 numbers with e4m3 interpretation
Definition amd_hip_fp8.h:2289
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3(const double4 val)
Definition amd_hip_fp8.h:2299
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3(const float4 val)
Definition amd_hip_fp8.h:2318
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3()=default
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3(const __half2 low, const __half2 high)
Definition amd_hip_fp8.h:2350
__FP8_HOST_DEVICE__ __hip_fp8x4_e4m3(const __hip_bfloat162 low, const __hip_bfloat162 high)
Definition amd_hip_fp8.h:2337
struct representing ocp fp8 numbers with e5m2 interpretation
Definition amd_hip_fp8.h:2398
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const int val)
Definition amd_hip_fp8.h:2421
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const short int val)
Definition amd_hip_fp8.h:2430
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const unsigned int val)
Definition amd_hip_fp8.h:2448
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const float f)
Definition amd_hip_fp8.h:2474
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const unsigned short int val)
Definition amd_hip_fp8.h:2457
__FP8_HOST_DEVICE__ __hip_fp8_e5m2()=default
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const unsigned long int val)
Definition amd_hip_fp8.h:2439
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const long int val)
Definition amd_hip_fp8.h:2412
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const double f)
Definition amd_hip_fp8.h:2466
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const __hip_bfloat16 f)
Definition amd_hip_fp8.h:2482
__FP8_HOST_DEVICE__ __hip_fp8_e5m2(const __half f)
Definition amd_hip_fp8.h:2491
struct representing two ocp fp8 numbers with e5m2 interpretation
Definition amd_hip_fp8.h:2755
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2(const __half2 val)
Definition amd_hip_fp8.h:2789
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2(const double2 val)
Definition amd_hip_fp8.h:2765
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2()=default
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2(const float2 val)
Definition amd_hip_fp8.h:2773
__FP8_HOST_DEVICE__ __hip_fp8x2_e5m2(const __hip_bfloat162 val)
Definition amd_hip_fp8.h:2781
struct representing four ocp fp8 numbers with e5m2 interpretation
Definition amd_hip_fp8.h:2830
__FP8_HOST_DEVICE__ __hip_fp8x4_e5m2(const __hip_bfloat162 low, const __hip_bfloat162 high)
Definition amd_hip_fp8.h:2877
__FP8_HOST_DEVICE__ __hip_fp8x4_e5m2(const double4 val)
Definition amd_hip_fp8.h:2839
__FP8_HOST_DEVICE__ __hip_fp8x4_e5m2(const __half2 low, const __half2 high)
Definition amd_hip_fp8.h:2890
__FP8_HOST_DEVICE__ __hip_fp8x4_e5m2(const float4 val)
Definition amd_hip_fp8.h:2858
Definition amd_hip_vector_types.h:2035
Definition amd_hip_vector_types.h:2042
Definition amd_hip_vector_types.h:2072
Definition amd_hip_vector_types.h:2079
Definition hip_fp16_gcc.h:7
Definition hip_fp16_gcc.h:11