24#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
25#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
27#if defined(__HIPCC_RTC__)
28 #define __HOST_DEVICE__ __device__
30 #define __HOST_DEVICE__ __host__ __device__
31 #include <hip/amd_detail/amd_hip_common.h>
34 #if defined(__cplusplus)
36 #include <type_traits>
41#if defined(__clang__) && defined(__HIP__)
42 typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
46 static_assert(
sizeof(_Float16) ==
sizeof(
unsigned short),
"");
55 static_assert(
sizeof(_Float16_2) ==
sizeof(
unsigned short[2]),
"");
65 #if defined(__cplusplus)
66 #if !defined(__HIPCC_RTC__)
67 #include "hip_fp16_math_fwd.h"
68 #include "amd_hip_vector_types.h"
70 #include "amd_device_functions.h"
71 #include "amd_warp_functions.h"
75 template<>
struct is_floating_point<_Float16> : std::true_type {};
78 template<
bool cond,
typename T =
void>
79 using Enable_if_t =
typename std::enable_if<cond, T>::type;
85 static_assert(
sizeof(_Float16) ==
sizeof(
unsigned short),
"");
96 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
98 __half(
decltype(data) x) : data{x} {}
101 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
103 __half(T x) : data{static_cast<_Float16>(x)} {}
106 __half(
const __half&) =
default;
108 __half(__half&&) =
default;
113 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
115 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
117 __half(T x) : data{static_cast<_Float16>(x)} {}
122 __half& operator=(
const __half&) =
default;
124 __half& operator=(__half&&) =
default;
132 volatile __half& operator=(
const __half_raw& x)
volatile
137 volatile __half& operator=(
const volatile __half_raw& x)
volatile
147 volatile __half& operator=(
__half_raw&& x)
volatile
152 volatile __half& operator=(
volatile __half_raw&& x)
volatile
157 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
160 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
162 __half& operator=(T x)
164 data =
static_cast<_Float16
>(x);
170 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
172 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
174 __half& operator=(T x)
176 data =
static_cast<_Float16
>(x);
181 #if !defined(__HIP_NO_HALF_OPERATORS__)
207 __half&
operator++() { ++data;
return *
this; }
216 __half&
operator--() { --data;
return *
this; }
227 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
230 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
232 operator T()
const {
return data; }
242 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
244 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
246 operator T()
const {
return data; }
249 #if !defined(__HIP_NO_HALF_OPERATORS__)
251 __half
operator+()
const {
return *
this; }
256 tmp.data = -tmp.data;
262 #if !defined(__HIP_NO_HALF_OPERATORS__)
266 __half
operator+(
const __half& x,
const __half& y)
268 return __half{x} += y;
273 __half
operator-(
const __half& x,
const __half& y)
275 return __half{x} -= y;
280 __half
operator*(
const __half& x,
const __half& y)
282 return __half{x} *= y;
287 __half
operator/(
const __half& x,
const __half& y)
289 return __half{x} /= y;
294 bool operator==(
const __half& x,
const __half& y)
296 return x.data == y.data;
301 bool operator!=(
const __half& x,
const __half& y)
308 bool operator<(
const __half& x,
const __half& y)
310 return x.data < y.data;
315 bool operator>(
const __half& x,
const __half& y)
317 return y.data < x.data;
322 bool operator<=(
const __half& x,
const __half& y)
329 bool operator>=(
const __half& x,
const __half& y)
342 sizeof(_Float16_2) ==
sizeof(
unsigned short[2]),
"");
357 __half2(
decltype(data) xx) : data{xx} {}
359 __half2(
const __half& xx,
const __half& yy)
365 __half2(
const __half2&) =
default;
367 __half2(__half2&&) =
default;
369 ~__half2() =
default;
373 __half2& operator=(
const __half2&) =
default;
375 __half2& operator=(__half2&&) =
default;
384 #if !defined(__HIP_NO_HALF_OPERATORS__)
410 __half2&
operator++() {
return *
this += _Float16_2{1, 1}; }
419 __half2&
operator--() {
return *
this -= _Float16_2{1, 1}; }
431 operator decltype(data)()
const {
return data; }
440 #if !defined(__HIP_NO_HALF_OPERATORS__)
442 __half2
operator+()
const {
return *
this; }
447 tmp.data = -tmp.data;
453 #if !defined(__HIP_NO_HALF_OPERATORS__)
457 __half2
operator+(
const __half2& xx,
const __half2& yy)
459 return __half2{xx} += yy;
464 __half2
operator-(
const __half2& xx,
const __half2& yy)
466 return __half2{xx} -= yy;
471 __half2
operator*(
const __half2& xx,
const __half2& yy)
473 return __half2{xx} *= yy;
478 __half2
operator/(
const __half2& xx,
const __half2& yy)
480 return __half2{xx} /= yy;
485 bool operator==(
const __half2& xx,
const __half2& yy)
487 auto r = xx.data == yy.data;
488 return r.x != 0 && r.y != 0;
493 bool operator!=(
const __half2& xx,
const __half2& yy)
500 bool operator<(
const __half2& xx,
const __half2& yy)
502 auto r = xx.data < yy.data;
503 return r.x != 0 && r.y != 0;
508 bool operator>(
const __half2& xx,
const __half2& yy)
515 bool operator<=(
const __half2& xx,
const __half2& yy)
522 bool operator>=(
const __half2& xx,
const __half2& yy)
534 __half2 make_half2(__half x, __half y)
536 return __half2{x, y};
541 __half __low2half(__half2 x)
548 __half __high2half(__half2 x)
555 __half2 __half2half2(__half x)
557 return __half2{x, x};
562 __half2 __halves2half2(__half x, __half y)
564 return __half2{x, y};
569 __half2 __low2half2(__half2 x)
579 __half2 __high2half2(__half2 x)
589 __half2 __lows2half2(__half2 x, __half2 y)
599 __half2 __highs2half2(__half2 x, __half2 y)
620 short __half_as_short(__half x)
627 unsigned short __half_as_ushort(__half x)
634 __half __short_as_half(
short x)
642 __half __ushort_as_half(
unsigned short x)
651 __half __float2half(
float x)
657 __half __float2half_rn(
float x)
661 #if !defined(__HIPCC_RTC__)
665 __half __float2half_rz(
float x)
671 __half __float2half_rd(
float x)
677 __half __float2half_ru(
float x)
684 __half __float2half_rz(
float x)
690 __half __float2half_rd(
float x)
696 __half __float2half_ru(
float x)
702 __half2 __float2half2_rn(
float x)
706 static_cast<_Float16
>(x),
static_cast<_Float16
>(x)}};
710 __half2 __floats2half2_rn(
float x,
float y)
712 return __half2{_Float16_2{
713 static_cast<_Float16
>(x),
static_cast<_Float16
>(y)}};
717 __half2 __float22half2_rn(
float2 x)
719 return __floats2half2_rn(x.x, x.y);
725 float __half2float(__half x)
743 float2 __half22float2(__half2 x)
753 int __half2int_rn(__half x)
759 int __half2int_rz(__half x)
765 int __half2int_rd(__half x)
771 int __half2int_ru(__half x)
779 __half __int2half_rn(
int x)
785 __half __int2half_rz(
int x)
791 __half __int2half_rd(
int x)
797 __half __int2half_ru(
int x)
805 short __half2short_rn(__half x)
811 short __half2short_rz(__half x)
817 short __half2short_rd(__half x)
823 short __half2short_ru(__half x)
831 __half __short2half_rn(
short x)
837 __half __short2half_rz(
short x)
843 __half __short2half_rd(
short x)
849 __half __short2half_ru(
short x)
857 long long __half2ll_rn(__half x)
863 long long __half2ll_rz(__half x)
869 long long __half2ll_rd(__half x)
875 long long __half2ll_ru(__half x)
883 __half __ll2half_rn(
long long x)
889 __half __ll2half_rz(
long long x)
895 __half __ll2half_rd(
long long x)
901 __half __ll2half_ru(
long long x)
909 unsigned int __half2uint_rn(__half x)
915 unsigned int __half2uint_rz(__half x)
921 unsigned int __half2uint_rd(__half x)
927 unsigned int __half2uint_ru(__half x)
935 __half __uint2half_rn(
unsigned int x)
941 __half __uint2half_rz(
unsigned int x)
947 __half __uint2half_rd(
unsigned int x)
953 __half __uint2half_ru(
unsigned int x)
961 unsigned short __half2ushort_rn(__half x)
967 unsigned short __half2ushort_rz(__half x)
973 unsigned short __half2ushort_rd(__half x)
979 unsigned short __half2ushort_ru(__half x)
987 __half __ushort2half_rn(
unsigned short x)
993 __half __ushort2half_rz(
unsigned short x)
999 __half __ushort2half_rd(
unsigned short x)
1005 __half __ushort2half_ru(
unsigned short x)
1013 unsigned long long __half2ull_rn(__half x)
1019 unsigned long long __half2ull_rz(__half x)
1025 unsigned long long __half2ull_rd(__half x)
1031 unsigned long long __half2ull_ru(__half x)
1039 __half __ull2half_rn(
unsigned long long x)
1045 __half __ull2half_rz(
unsigned long long x)
1051 __half __ull2half_rd(
unsigned long long x)
1057 __half __ull2half_ru(
unsigned long long x)
1065 __half __ldg(
const __half* ptr) {
return *ptr; }
1068 __half __ldcg(
const __half* ptr) {
return *ptr; }
1071 __half __ldca(
const __half* ptr) {
return *ptr; }
1074 __half __ldcs(
const __half* ptr) {
return *ptr; }
1078 __half2 __ldg(
const __half2* ptr) {
return *ptr; }
1081 __half2 __ldcg(
const __half2* ptr) {
return *ptr; }
1084 __half2 __ldca(
const __half2* ptr) {
return *ptr; }
1087 __half2 __ldcs(
const __half2* ptr) {
return *ptr; }
1092 bool __heq(__half x, __half y)
1099 bool __hne(__half x, __half y)
1106 bool __hle(__half x, __half y)
1113 bool __hge(__half x, __half y)
1120 bool __hlt(__half x, __half y)
1127 bool __hgt(__half x, __half y)
1133 bool __hequ(__half x, __half y) {
1138 bool __hneu(__half x, __half y) {
1142 bool __hleu(__half x, __half y) {
1147 bool __hgeu(__half x, __half y) {
1152 bool __hltu(__half x, __half y) {
1157 bool __hgtu(__half x, __half y) {
1163 __half2
__heq2(__half2 x, __half2 y)
1167 return __builtin_convertvector(-r, _Float16_2);
1171 __half2
__hne2(__half2 x, __half2 y)
1175 return __builtin_convertvector(-r, _Float16_2);
1179 __half2
__hle2(__half2 x, __half2 y)
1183 return __builtin_convertvector(-r, _Float16_2);
1187 __half2
__hge2(__half2 x, __half2 y)
1191 return __builtin_convertvector(-r, _Float16_2);
1195 __half2
__hlt2(__half2 x, __half2 y)
1199 return __builtin_convertvector(-r, _Float16_2);
1203 __half2
__hgt2(__half2 x, __half2 y)
1207 return __builtin_convertvector(-r, _Float16_2);
1209 inline __HOST_DEVICE__
1210 __half2 __hequ2(__half2 x, __half2 y) {
1213 return __builtin_convertvector(-r, _Float16_2);
1217 __half2 __hneu2(__half2 x, __half2 y) {
1219 return __builtin_convertvector(-r, _Float16_2);
1223 __half2 __hleu2(__half2 x, __half2 y) {
1225 return __builtin_convertvector(-r, _Float16_2);
1229 __half2 __hgeu2(__half2 x, __half2 y) {
1231 return __builtin_convertvector(-r, _Float16_2);
1235 __half2 __hltu2(__half2 x, __half2 y) {
1237 return __builtin_convertvector(-r, _Float16_2);
1241 __half2 __hgtu2(__half2 x, __half2 y) {
1243 return __builtin_convertvector(-r, _Float16_2);
1248 bool __hbeq2(__half2 x, __half2 y)
1251 return r.data.x != 0 && r.data.y != 0;
1255 bool __hbne2(__half2 x, __half2 y)
1258 return r.data.x != 0 && r.data.y != 0;
1262 bool __hble2(__half2 x, __half2 y)
1265 return r.data.x != 0 && r.data.y != 0;
1269 bool __hbge2(__half2 x, __half2 y)
1272 return r.data.x != 0 && r.data.y != 0;
1276 bool __hblt2(__half2 x, __half2 y)
1279 return r.data.x != 0 && r.data.y != 0;
1283 bool __hbgt2(__half2 x, __half2 y)
1286 return r.data.x != 0 && r.data.y != 0;
1308 __half
__hmax(
const __half x,
const __half y) {
1314 __half __hmax_nan(
const __half x,
const __half y) {
1315 if(__ocml_isnan_f16(
static_cast<__half_raw>(x).data)) {
1317 }
else if (__ocml_isnan_f16(
static_cast<__half_raw>(y).data)) {
1324 __half
__hmin(
const __half x,
const __half y) {
1330 __half __hmin_nan(
const __half x,
const __half y) {
1331 if(__ocml_isnan_f16(
static_cast<__half_raw>(x).data)) {
1333 }
else if (__ocml_isnan_f16(
static_cast<__half_raw>(y).data)) {
1342 __half __clamp_01(__half x)
1353 __half
__hadd(__half x, __half y)
1364 __ocml_fabs_f16(
static_cast<__half_raw>(x).data)};
1368 __half
__hsub(__half x, __half y)
1376 __half
__hmul(__half x, __half y)
1384 __half __hadd_sat(__half x, __half y)
1386 return __clamp_01(
__hadd(x, y));
1390 __half __hsub_sat(__half x, __half y)
1392 return __clamp_01(
__hsub(x, y));
1396 __half __hmul_sat(__half x, __half y)
1398 return __clamp_01(
__hmul(x, y));
1402 __half
__hfma(__half x, __half y, __half z)
1411 __half __hfma_sat(__half x, __half y, __half z)
1413 return __clamp_01(
__hfma(x, y, z));
1417 __half
__hdiv(__half x, __half y)
1426 __half2
__hadd2(__half2 x, __half2 y)
1437 __ocml_fabs_2f16(
static_cast<__half2_raw>(x).data)};
1441 __half2
__hsub2(__half2 x, __half2 y)
1449 __half2
__hmul2(__half2 x, __half2 y)
1457 __half2 __hadd2_sat(__half2 x, __half2 y)
1466 __half2 __hsub2_sat(__half2 x, __half2 y)
1475 __half2 __hmul2_sat(__half2 x, __half2 y)
1484 __half2
__hfma2(__half2 x, __half2 y, __half2 z)
1486 return __half2{__ocml_fma_2f16(x, y, z)};
1490 __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
1499 __half2
__h2div(__half2 x, __half2 y)
1507 #if defined(__clang__) && defined(__HIP__)
1510 float amd_mixed_dot(__half2 a, __half2 b,
float c,
bool saturate) {
1511 return __ockl_fdot2(
static_cast<__half2_raw>(a).data,
1521 __ocml_trunc_f16(
static_cast<__half_raw>(x).data)};
1525 __half
hceil(__half x)
1528 __ocml_ceil_f16(
static_cast<__half_raw>(x).data)};
1535 __ocml_floor_f16(
static_cast<__half_raw>(x).data)};
1539 __half
hrint(__half x)
1542 __ocml_rint_f16(
static_cast<__half_raw>(x).data)};
1546 __half
hsin(__half x)
1549 __ocml_sin_f16(
static_cast<__half_raw>(x).data)};
1553 __half
hcos(__half x)
1556 __ocml_cos_f16(
static_cast<__half_raw>(x).data)};
1560 __half
hexp(__half x)
1563 __ocml_exp_f16(
static_cast<__half_raw>(x).data)};
1567 __half
hexp2(__half x)
1570 __ocml_exp2_f16(
static_cast<__half_raw>(x).data)};
1577 __ocml_exp10_f16(
static_cast<__half_raw>(x).data)};
1581 __half
hlog2(__half x)
1584 __ocml_log2_f16(
static_cast<__half_raw>(x).data)};
1588 __half
hlog(__half x)
1591 __ocml_log_f16(
static_cast<__half_raw>(x).data)};
1598 __ocml_log10_f16(
static_cast<__half_raw>(x).data)};
1602 __half
hrcp(__half x)
1605 static_cast<_Float16
>(1.0f) /
static_cast<__half_raw>(x).data};
1612 __ocml_rsqrt_f16(
static_cast<__half_raw>(x).data)};
1616 __half
hsqrt(__half x)
1619 __ocml_sqrt_f16(
static_cast<__half_raw>(x).data)};
1625 return __ocml_isinf_f16(
static_cast<__half_raw>(x).data);
1631 return __ocml_isnan_f16(
static_cast<__half_raw>(x).data);
1644 return __half2{__ocml_trunc_2f16(x)};
1648 __half2
h2ceil(__half2 x)
1650 return __half2{__ocml_ceil_2f16(x)};
1656 return __half2{__ocml_floor_2f16(x)};
1660 __half2
h2rint(__half2 x)
1662 return __half2{__ocml_rint_2f16(x)};
1666 __half2
h2sin(__half2 x)
1668 return __half2{__ocml_sin_2f16(x)};
1672 __half2
h2cos(__half2 x)
1674 return __half2{__ocml_cos_2f16(x)};
1678 __half2
h2exp(__half2 x)
1680 return __half2{__ocml_exp_2f16(x)};
1684 __half2
h2exp2(__half2 x)
1686 return __half2{__ocml_exp2_2f16(x)};
1692 return __half2{__ocml_exp10_2f16(x)};
1696 __half2
h2log2(__half2 x)
1698 return __half2{__ocml_log2_2f16(x)};
1702 __half2
h2log(__half2 x) {
return __ocml_log_2f16(x); }
1705 __half2
h2log10(__half2 x) {
return __ocml_log10_2f16(x); }
1708 __half2
h2rcp(__half2 x) {
1710 _Float16_2{
static_cast<_Float16
>(1.0f),
static_cast<_Float16
>(1.0f)} / x.data};
1714 __half2
h2rsqrt(__half2 x) {
return __ocml_rsqrt_2f16(x); }
1717 __half2
h2sqrt(__half2 x) {
return __ocml_sqrt_2f16(x); }
1720 __half2 __hisinf2(__half2 x)
1722 auto r = __ocml_isinf_2f16(x);
1723 return __half2{_Float16_2{
1724 static_cast<_Float16
>(r.x),
static_cast<_Float16
>(r.y)}};
1730 auto r = __ocml_isnan_2f16(x);
1731 return __half2{_Float16_2{
1732 static_cast<_Float16
>(r.x),
static_cast<_Float16
>(r.y)}};
1738 return __half2{-
static_cast<__half2_raw>(x).data};
1742 #if !defined(HIP_NO_HALF)
1743 using half = __half;
1744 using half2 = __half2;
1748 __half __shfl(__half var,
int src_lane,
int width = warpSize) {
1749 union {
int i; __half h; } tmp; tmp.h = var;
1750 tmp.i = __shfl(tmp.i, src_lane, width);
1755 __half2 __shfl(__half2 var,
int src_lane,
int width = warpSize) {
1756 union {
int i; __half2 h; } tmp; tmp.h = var;
1757 tmp.i = __shfl(tmp.i, src_lane, width);
1762 __half __shfl_up(__half var,
unsigned int lane_delta,
int width = warpSize) {
1763 union {
int i; __half h; } tmp; tmp.h = var;
1764 tmp.i = __shfl_up(tmp.i, lane_delta, width);
1769 __half2 __shfl_up(__half2 var,
unsigned int lane_delta,
int width = warpSize) {
1770 union {
int i; __half2 h; } tmp; tmp.h = var;
1771 tmp.i = __shfl_up(tmp.i, lane_delta, width);
1776 __half __shfl_down(__half var,
unsigned int lane_delta,
int width = warpSize) {
1777 union {
int i; __half h; } tmp; tmp.h = var;
1778 tmp.i = __shfl_down(tmp.i, lane_delta, width);
1783 __half2 __shfl_down(__half2 var,
unsigned int lane_delta,
int width = warpSize) {
1784 union {
int i; __half2 h; } tmp; tmp.h = var;
1785 tmp.i = __shfl_down(tmp.i, lane_delta, width);
1790 __half __shfl_xor(__half var,
int lane_mask,
int width = warpSize) {
1791 union {
int i; __half h; } tmp; tmp.h = var;
1792 tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1797 __half2 __shfl_xor(__half2 var,
int lane_mask,
int width = warpSize) {
1798 union {
int i; __half2 h; } tmp; tmp.h = var;
1799 tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1803#elif defined(__GNUC__)
1804 #if !defined(__HIPCC_RTC__)
1805 #include "hip_fp16_gcc.h"
#define __host__
Definition host_defines.h:170
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:681
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator-=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to subtract-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:912
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16 &l)
Operator to unary+ on a __hip_bfloat16 number.
Definition amd_hip_bf16.h:835
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator/(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:921
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16 &l)
Operator to negate a __hip_bfloat16 number.
Definition amd_hip_bf16.h:850
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:715
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:673
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator/=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:930
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator*(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:817
__BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:697
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator*=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:826
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:707
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator++(__hip_bfloat16 &l, const int)
Operator to post increment a __hip_bfloat16 number.
Definition amd_hip_bf16.h:865
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator+=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to add-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:903
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:725
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator--(__hip_bfloat16 &l, const int)
Operator to post decrement a __hip_bfloat16 number.
Definition amd_hip_bf16.h:884
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:689
__BF16_HOST_DEVICE_STATIC__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:1067
__BF16_HOST_DEVICE_STATIC__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:1084
__BF16_HOST_DEVICE_STATIC__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:1116
__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform an equal compare on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1463
__BF16_HOST_DEVICE_STATIC__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than equal.
Definition amd_hip_bf16.h:1172
__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a not equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1471
__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1495
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:1144
__BF16_HOST_DEVICE_STATIC__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:1100
__BF16_HOST_DEVICE_STATIC__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:1180
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:1132
__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1487
__BF16_HOST_DEVICE_STATIC__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:1188
__BF16_HOST_DEVICE_STATIC__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:1124
__BF16_HOST_DEVICE_STATIC__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:1092
__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1479
__BF16_HOST_DEVICE_STATIC__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:1164
__BF16_HOST_DEVICE_STATIC__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:1108
__BF16_HOST_DEVICE_STATIC__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:1156
__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1503
__BF16_HOST_DEVICE_STATIC__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:1197
__BF16_HOST_DEVICE_STATIC__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:1075
__BF16_HOST_DEVICE_STATIC__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:1239
__BF16_HOST_DEVICE_STATIC__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:1250
__BF16_HOST_DEVICE_STATIC__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:1272
__BF16_HOST_DEVICE_STATIC__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:1294
__BF16_HOST_DEVICE_STATIC__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:1283
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:1448
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1354
__BF16_HOST_DEVICE_STATIC__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1316
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1409
__BF16_HOST_DEVICE_STATIC__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:1305
__BF16_HOST_DEVICE_STATIC__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:1217
__BF16_HOST_DEVICE_STATIC__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:1206
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1394
__BF16_HOST_DEVICE_STATIC__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1328
__BF16_HOST_DEVICE_STATIC__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:1261
__BF16_HOST_DEVICE_STATIC__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:1228
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:1339
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1369
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:1384
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:749
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:805
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:784
__BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:770
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:758
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:796
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:735
__BF16_HOST_DEVICE_STATIC__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:628
__BF16_HOST_DEVICE_STATIC__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:590
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:637
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:1588
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:1612
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:1596
__BF16_DEVICE_STATIC__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:1564
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:1636
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:1668
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:1620
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:1660
__BF16_DEVICE_STATIC__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:1604
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:1644
__BF16_DEVICE_STATIC__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:1676
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:1652
__BF16_DEVICE_STATIC__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:1572
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:1628
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:1580
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:1810
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:1765
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:1738
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:1702
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:1693
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:1792
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:1756
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:1684
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:1729
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:1711
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:1720
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:1747
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:1783
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:1774
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:1801
Definition amd_hip_vector_types.h:2035
Definition hip_fp16_gcc.h:7
Definition hip_fp16_gcc.h:11