HIP: Heterogenous-computing Interface for Portability
amd_hip_fp16.h
1 /*
2 Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #pragma once
24 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
25 #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
26 
27 #if defined(__HIPCC_RTC__)
28  #define __HOST_DEVICE__ __device__
29 #else
30  #define __HOST_DEVICE__ __host__ __device__
31  #include <hip/amd_detail/amd_hip_common.h>
33  #include <assert.h>
34  #if defined(__cplusplus)
35  #include <algorithm>
36  #include <type_traits>
37  #include <utility>
38 #endif
39 #endif // !defined(__HIPCC_RTC__)
40 
41 #if defined(__clang__) && defined(__HIP__)
42  typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
43 
44  struct __half_raw {
45  union {
46  static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
47 
48  _Float16 data;
49  unsigned short x;
50  };
51  };
52 
53  struct __half2_raw {
54  union {
55  static_assert(sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
56 
57  struct {
58  __half_raw x;
59  __half_raw y;
60  };
61  _Float16_2 data;
62  };
63  };
64 
65  #if defined(__cplusplus)
66  #if !defined(__HIPCC_RTC__)
67  #include "hip_fp16_math_fwd.h"
68  #include "amd_hip_vector_types.h"
69  #include "host_defines.h"
70  #include "amd_device_functions.h"
71  #include "amd_warp_functions.h"
72  #endif
73  namespace std
74  {
75  template<> struct is_floating_point<_Float16> : std::true_type {};
76  }
77 
78  template<bool cond, typename T = void>
79  using Enable_if_t = typename std::enable_if<cond, T>::type;
80 
81  // BEGIN STRUCT __HALF
82  struct __half {
83  protected:
84  union {
85  static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
86 
87  _Float16 data;
88  unsigned short __x;
89  };
90  public:
91  // CREATORS
92  __HOST_DEVICE__
93  __half() = default;
94  __HOST_DEVICE__
95  __half(const __half_raw& x) : data{x.data} {}
96  #if !defined(__HIP_NO_HALF_CONVERSIONS__)
97  __HOST_DEVICE__
98  __half(decltype(data) x) : data{x} {}
99  template<
100  typename T,
101  Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
102  __HOST_DEVICE__
103  __half(T x) : data{static_cast<_Float16>(x)} {}
104  #endif
105  __HOST_DEVICE__
106  __half(const __half&) = default;
107  __HOST_DEVICE__
108  __half(__half&&) = default;
109  __HOST_DEVICE__
110  ~__half() = default;
111 
112  // CREATORS - DEVICE ONLY
113  #if !defined(__HIP_NO_HALF_CONVERSIONS__)
114  template<
115  typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
116  __HOST_DEVICE__
117  __half(T x) : data{static_cast<_Float16>(x)} {}
118  #endif
119 
120  // MANIPULATORS
121  __HOST_DEVICE__
122  __half& operator=(const __half&) = default;
123  __HOST_DEVICE__
124  __half& operator=(__half&&) = default;
125  __HOST_DEVICE__
126  __half& operator=(const __half_raw& x)
127  {
128  data = x.data;
129  return *this;
130  }
131  __HOST_DEVICE__
132  volatile __half& operator=(const __half_raw& x) volatile
133  {
134  data = x.data;
135  return *this;
136  }
137  volatile __half& operator=(const volatile __half_raw& x) volatile
138  {
139  data = x.data;
140  return *this;
141  }
142  __half& operator=(__half_raw&& x)
143  {
144  data = x.data;
145  return *this;
146  }
147  volatile __half& operator=(__half_raw&& x) volatile
148  {
149  data = x.data;
150  return *this;
151  }
152  volatile __half& operator=(volatile __half_raw&& x) volatile
153  {
154  data = x.data;
155  return *this;
156  }
157  #if !defined(__HIP_NO_HALF_CONVERSIONS__)
158  template<
159  typename T,
160  Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
161  __HOST_DEVICE__
162  __half& operator=(T x)
163  {
164  data = static_cast<_Float16>(x);
165  return *this;
166  }
167  #endif
168 
169  // MANIPULATORS - DEVICE ONLY
170  #if !defined(__HIP_NO_HALF_CONVERSIONS__)
171  template<
172  typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
173  __device__
174  __half& operator=(T x)
175  {
176  data = static_cast<_Float16>(x);
177  return *this;
178  }
179  #endif
180 
181  #if !defined(__HIP_NO_HALF_OPERATORS__)
182  __device__
183  __half& operator+=(const __half& x)
184  {
185  data += x.data;
186  return *this;
187  }
188  __device__
189  __half& operator-=(const __half& x)
190  {
191  data -= x.data;
192  return *this;
193  }
194  __device__
195  __half& operator*=(const __half& x)
196  {
197  data *= x.data;
198  return *this;
199  }
200  __device__
201  __half& operator/=(const __half& x)
202  {
203  data /= x.data;
204  return *this;
205  }
206  __device__
207  __half& operator++() { ++data; return *this; }
208  __device__
209  __half operator++(int)
210  {
211  __half tmp{*this};
212  ++*this;
213  return tmp;
214  }
215  __device__
216  __half& operator--() { --data; return *this; }
217  __device__
218  __half operator--(int)
219  {
220  __half tmp{*this};
221  --*this;
222  return tmp;
223  }
224  #endif
225 
226  // ACCESSORS
227  #if !defined(__HIP_NO_HALF_CONVERSIONS__)
228  template<
229  typename T,
230  Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
231  __HOST_DEVICE__
232  operator T() const { return data; }
233  #endif
234  __HOST_DEVICE__
235  operator __half_raw() const { return __half_raw{data}; }
236  __HOST_DEVICE__
237  operator __half_raw() const volatile
238  {
239  return __half_raw{data};
240  }
241 
242  #if !defined(__HIP_NO_HALF_CONVERSIONS__)
243  template<
244  typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
245  __HOST_DEVICE__
246  operator T() const { return data; }
247  #endif
248 
249  #if !defined(__HIP_NO_HALF_OPERATORS__)
250  __device__
251  __half operator+() const { return *this; }
252  __device__
253  __half operator-() const
254  {
255  __half tmp{*this};
256  tmp.data = -tmp.data;
257  return tmp;
258  }
259  #endif
260 
261  // FRIENDS
262  #if !defined(__HIP_NO_HALF_OPERATORS__)
263  friend
264  inline
265  __device__
266  __half operator+(const __half& x, const __half& y)
267  {
268  return __half{x} += y;
269  }
270  friend
271  inline
272  __device__
273  __half operator-(const __half& x, const __half& y)
274  {
275  return __half{x} -= y;
276  }
277  friend
278  inline
279  __device__
280  __half operator*(const __half& x, const __half& y)
281  {
282  return __half{x} *= y;
283  }
284  friend
285  inline
286  __device__
287  __half operator/(const __half& x, const __half& y)
288  {
289  return __half{x} /= y;
290  }
291  friend
292  inline
293  __device__
294  bool operator==(const __half& x, const __half& y)
295  {
296  return x.data == y.data;
297  }
298  friend
299  inline
300  __device__
301  bool operator!=(const __half& x, const __half& y)
302  {
303  return !(x == y);
304  }
305  friend
306  inline
307  __device__
308  bool operator<(const __half& x, const __half& y)
309  {
310  return x.data < y.data;
311  }
312  friend
313  inline
314  __device__
315  bool operator>(const __half& x, const __half& y)
316  {
317  return y.data < x.data;
318  }
319  friend
320  inline
321  __device__
322  bool operator<=(const __half& x, const __half& y)
323  {
324  return !(y < x);
325  }
326  friend
327  inline
328  __device__
329  bool operator>=(const __half& x, const __half& y)
330  {
331  return !(x < y);
332  }
333  #endif // !defined(__HIP_NO_HALF_OPERATORS__)
334  };
335  // END STRUCT __HALF
336 
337  // BEGIN STRUCT __HALF2
338  struct __half2 {
339  public:
340  union {
341  static_assert(
342  sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
343 
344  struct {
345  __half x;
346  __half y;
347  };
348  _Float16_2 data;
349  };
350 
351  // CREATORS
352  __HOST_DEVICE__
353  __half2() = default;
354  __HOST_DEVICE__
355  __half2(const __half2_raw& xx) : data{xx.data} {}
356  __HOST_DEVICE__
357  __half2(decltype(data) xx) : data{xx} {}
358  __HOST_DEVICE__
359  __half2(const __half& xx, const __half& yy)
360  :
361  data{static_cast<__half_raw>(xx).data,
362  static_cast<__half_raw>(yy).data}
363  {}
364  __HOST_DEVICE__
365  __half2(const __half2&) = default;
366  __HOST_DEVICE__
367  __half2(__half2&&) = default;
368  __HOST_DEVICE__
369  ~__half2() = default;
370 
371  // MANIPULATORS
372  __HOST_DEVICE__
373  __half2& operator=(const __half2&) = default;
374  __HOST_DEVICE__
375  __half2& operator=(__half2&&) = default;
376  __HOST_DEVICE__
377  __half2& operator=(const __half2_raw& xx)
378  {
379  data = xx.data;
380  return *this;
381  }
382 
383  // MANIPULATORS - DEVICE ONLY
384  #if !defined(__HIP_NO_HALF_OPERATORS__)
385  __device__
386  __half2& operator+=(const __half2& xx)
387  {
388  data += xx.data;
389  return *this;
390  }
391  __device__
392  __half2& operator-=(const __half2& xx)
393  {
394  data -= xx.data;
395  return *this;
396  }
397  __device__
398  __half2& operator*=(const __half2& xx)
399  {
400  data *= xx.data;
401  return *this;
402  }
403  __device__
404  __half2& operator/=(const __half2& xx)
405  {
406  data /= xx.data;
407  return *this;
408  }
409  __device__
410  __half2& operator++() { return *this += _Float16_2{1, 1}; }
411  __device__
412  __half2 operator++(int)
413  {
414  __half2 tmp{*this};
415  ++*this;
416  return tmp;
417  }
418  __device__
419  __half2& operator--() { return *this -= _Float16_2{1, 1}; }
420  __device__
421  __half2 operator--(int)
422  {
423  __half2 tmp{*this};
424  --*this;
425  return tmp;
426  }
427  #endif
428 
429  // ACCESSORS
430  __HOST_DEVICE__
431  operator decltype(data)() const { return data; }
432  __HOST_DEVICE__
433  operator __half2_raw() const {
434  __half2_raw r;
435  r.data = data;
436  return r;
437  }
438 
439  // ACCESSORS - DEVICE ONLY
440  #if !defined(__HIP_NO_HALF_OPERATORS__)
441  __device__
442  __half2 operator+() const { return *this; }
443  __device__
444  __half2 operator-() const
445  {
446  __half2 tmp{*this};
447  tmp.data = -tmp.data;
448  return tmp;
449  }
450  #endif
451 
452  // FRIENDS
453  #if !defined(__HIP_NO_HALF_OPERATORS__)
454  friend
455  inline
456  __device__
457  __half2 operator+(const __half2& xx, const __half2& yy)
458  {
459  return __half2{xx} += yy;
460  }
461  friend
462  inline
463  __device__
464  __half2 operator-(const __half2& xx, const __half2& yy)
465  {
466  return __half2{xx} -= yy;
467  }
468  friend
469  inline
470  __device__
471  __half2 operator*(const __half2& xx, const __half2& yy)
472  {
473  return __half2{xx} *= yy;
474  }
475  friend
476  inline
477  __device__
478  __half2 operator/(const __half2& xx, const __half2& yy)
479  {
480  return __half2{xx} /= yy;
481  }
482  friend
483  inline
484  __device__
485  bool operator==(const __half2& xx, const __half2& yy)
486  {
487  auto r = xx.data == yy.data;
488  return r.x != 0 && r.y != 0;
489  }
490  friend
491  inline
492  __device__
493  bool operator!=(const __half2& xx, const __half2& yy)
494  {
495  return !(xx == yy);
496  }
497  friend
498  inline
499  __device__
500  bool operator<(const __half2& xx, const __half2& yy)
501  {
502  auto r = xx.data < yy.data;
503  return r.x != 0 && r.y != 0;
504  }
505  friend
506  inline
507  __device__
508  bool operator>(const __half2& xx, const __half2& yy)
509  {
510  return yy < xx;
511  }
512  friend
513  inline
514  __device__
515  bool operator<=(const __half2& xx, const __half2& yy)
516  {
517  return !(yy < xx);
518  }
519  friend
520  inline
521  __device__
522  bool operator>=(const __half2& xx, const __half2& yy)
523  {
524  return !(xx < yy);
525  }
526  #endif // !defined(__HIP_NO_HALF_OPERATORS__)
527  };
528  // END STRUCT __HALF2
529 
530  namespace
531  {
532  inline
533  __HOST_DEVICE__
534  __half2 make_half2(__half x, __half y)
535  {
536  return __half2{x, y};
537  }
538 
539  inline
540  __HOST_DEVICE__
541  __half __low2half(__half2 x)
542  {
543  return __half{__half_raw{static_cast<__half2_raw>(x).data.x}};
544  }
545 
546  inline
547  __HOST_DEVICE__
548  __half __high2half(__half2 x)
549  {
550  return __half{__half_raw{static_cast<__half2_raw>(x).data.y}};
551  }
552 
553  inline
554  __HOST_DEVICE__
555  __half2 __half2half2(__half x)
556  {
557  return __half2{x, x};
558  }
559 
560  inline
561  __HOST_DEVICE__
562  __half2 __halves2half2(__half x, __half y)
563  {
564  return __half2{x, y};
565  }
566 
567  inline
568  __HOST_DEVICE__
569  __half2 __low2half2(__half2 x)
570  {
571  return __half2{
572  _Float16_2{
573  static_cast<__half2_raw>(x).data.x,
574  static_cast<__half2_raw>(x).data.x}};
575  }
576 
577  inline
578  __HOST_DEVICE__
579  __half2 __high2half2(__half2 x)
580  {
581  return __half2{
582  _Float16_2{
583  static_cast<__half2_raw>(x).data.y,
584  static_cast<__half2_raw>(x).data.y}};
585  }
586 
587  inline
588  __HOST_DEVICE__
589  __half2 __lows2half2(__half2 x, __half2 y)
590  {
591  return __half2{
592  _Float16_2{
593  static_cast<__half2_raw>(x).data.x,
594  static_cast<__half2_raw>(y).data.x}};
595  }
596 
597  inline
598  __HOST_DEVICE__
599  __half2 __highs2half2(__half2 x, __half2 y)
600  {
601  return __half2{
602  _Float16_2{
603  static_cast<__half2_raw>(x).data.y,
604  static_cast<__half2_raw>(y).data.y}};
605  }
606 
607  inline
608  __HOST_DEVICE__
609  __half2 __lowhigh2highlow(__half2 x)
610  {
611  return __half2{
612  _Float16_2{
613  static_cast<__half2_raw>(x).data.y,
614  static_cast<__half2_raw>(x).data.x}};
615  }
616 
617  // Bitcasts
618  inline
619  __device__
620  short __half_as_short(__half x)
621  {
622  return static_cast<__half_raw>(x).x;
623  }
624 
625  inline
626  __device__
627  unsigned short __half_as_ushort(__half x)
628  {
629  return static_cast<__half_raw>(x).x;
630  }
631 
632  inline
633  __device__
634  __half __short_as_half(short x)
635  {
636  __half_raw r; r.x = x;
637  return r;
638  }
639 
640  inline
641  __device__
642  __half __ushort_as_half(unsigned short x)
643  {
644  __half_raw r; r.x = x;
645  return r;
646  }
647 
648  // float -> half | half2
649  inline
650  __HOST_DEVICE__
651  __half __float2half(float x)
652  {
653  return __half_raw{static_cast<_Float16>(x)};
654  }
655  inline
656  __HOST_DEVICE__
657  __half __float2half_rn(float x)
658  {
659  return __half_raw{static_cast<_Float16>(x)};
660  }
661  #if !defined(__HIPCC_RTC__)
662  // TODO: rounding behaviour is not correct for host functions.
663  inline
664  __host__
665  __half __float2half_rz(float x)
666  {
667  return __half_raw{static_cast<_Float16>(x)};
668  }
669  inline
670  __host__
671  __half __float2half_rd(float x)
672  {
673  return __half_raw{static_cast<_Float16>(x)};
674  }
675  inline
676  __host__
677  __half __float2half_ru(float x)
678  {
679  return __half_raw{static_cast<_Float16>(x)};
680  }
681  #endif
682  inline
683  __device__
684  __half __float2half_rz(float x)
685  {
686  return __half_raw{__ocml_cvtrtz_f16_f32(x)};
687  }
688  inline
689  __device__
690  __half __float2half_rd(float x)
691  {
692  return __half_raw{__ocml_cvtrtn_f16_f32(x)};
693  }
694  inline
695  __device__
696  __half __float2half_ru(float x)
697  {
698  return __half_raw{__ocml_cvtrtp_f16_f32(x)};
699  }
700  inline
701  __HOST_DEVICE__
702  __half2 __float2half2_rn(float x)
703  {
704  return __half2{
705  _Float16_2{
706  static_cast<_Float16>(x), static_cast<_Float16>(x)}};
707  }
708  inline
709  __HOST_DEVICE__
710  __half2 __floats2half2_rn(float x, float y)
711  {
712  return __half2{_Float16_2{
713  static_cast<_Float16>(x), static_cast<_Float16>(y)}};
714  }
715  inline
716  __HOST_DEVICE__
717  __half2 __float22half2_rn(float2 x)
718  {
719  return __floats2half2_rn(x.x, x.y);
720  }
721 
722  // half | half2 -> float
723  inline
724  __HOST_DEVICE__
725  float __half2float(__half x)
726  {
727  return static_cast<__half_raw>(x).data;
728  }
729  inline
730  __HOST_DEVICE__
731  float __low2float(__half2 x)
732  {
733  return static_cast<__half2_raw>(x).data.x;
734  }
735  inline
736  __HOST_DEVICE__
737  float __high2float(__half2 x)
738  {
739  return static_cast<__half2_raw>(x).data.y;
740  }
741  inline
742  __HOST_DEVICE__
743  float2 __half22float2(__half2 x)
744  {
745  return make_float2(
746  static_cast<__half2_raw>(x).data.x,
747  static_cast<__half2_raw>(x).data.y);
748  }
749 
750  // half -> int
751  inline
752  __device__
753  int __half2int_rn(__half x)
754  {
755  return static_cast<__half_raw>(x).data;
756  }
757  inline
758  __device__
759  int __half2int_rz(__half x)
760  {
761  return static_cast<__half_raw>(x).data;
762  }
763  inline
764  __device__
765  int __half2int_rd(__half x)
766  {
767  return static_cast<__half_raw>(x).data;
768  }
769  inline
770  __device__
771  int __half2int_ru(__half x)
772  {
773  return static_cast<__half_raw>(x).data;
774  }
775 
776  // int -> half
777  inline
778  __device__
779  __half __int2half_rn(int x)
780  {
781  return __half_raw{static_cast<_Float16>(x)};
782  }
783  inline
784  __device__
785  __half __int2half_rz(int x)
786  {
787  return __half_raw{static_cast<_Float16>(x)};
788  }
789  inline
790  __device__
791  __half __int2half_rd(int x)
792  {
793  return __half_raw{static_cast<_Float16>(x)};
794  }
795  inline
796  __device__
797  __half __int2half_ru(int x)
798  {
799  return __half_raw{static_cast<_Float16>(x)};
800  }
801 
802  // half -> short
803  inline
804  __device__
805  short __half2short_rn(__half x)
806  {
807  return static_cast<__half_raw>(x).data;
808  }
809  inline
810  __device__
811  short __half2short_rz(__half x)
812  {
813  return static_cast<__half_raw>(x).data;
814  }
815  inline
816  __device__
817  short __half2short_rd(__half x)
818  {
819  return static_cast<__half_raw>(x).data;
820  }
821  inline
822  __device__
823  short __half2short_ru(__half x)
824  {
825  return static_cast<__half_raw>(x).data;
826  }
827 
828  // short -> half
829  inline
830  __device__
831  __half __short2half_rn(short x)
832  {
833  return __half_raw{static_cast<_Float16>(x)};
834  }
835  inline
836  __device__
837  __half __short2half_rz(short x)
838  {
839  return __half_raw{static_cast<_Float16>(x)};
840  }
841  inline
842  __device__
843  __half __short2half_rd(short x)
844  {
845  return __half_raw{static_cast<_Float16>(x)};
846  }
847  inline
848  __device__
849  __half __short2half_ru(short x)
850  {
851  return __half_raw{static_cast<_Float16>(x)};
852  }
853 
854  // half -> long long
855  inline
856  __device__
857  long long __half2ll_rn(__half x)
858  {
859  return static_cast<__half_raw>(x).data;
860  }
861  inline
862  __device__
863  long long __half2ll_rz(__half x)
864  {
865  return static_cast<__half_raw>(x).data;
866  }
867  inline
868  __device__
869  long long __half2ll_rd(__half x)
870  {
871  return static_cast<__half_raw>(x).data;
872  }
873  inline
874  __device__
875  long long __half2ll_ru(__half x)
876  {
877  return static_cast<__half_raw>(x).data;
878  }
879 
880  // long long -> half
881  inline
882  __device__
883  __half __ll2half_rn(long long x)
884  {
885  return __half_raw{static_cast<_Float16>(x)};
886  }
887  inline
888  __device__
889  __half __ll2half_rz(long long x)
890  {
891  return __half_raw{static_cast<_Float16>(x)};
892  }
893  inline
894  __device__
895  __half __ll2half_rd(long long x)
896  {
897  return __half_raw{static_cast<_Float16>(x)};
898  }
899  inline
900  __device__
901  __half __ll2half_ru(long long x)
902  {
903  return __half_raw{static_cast<_Float16>(x)};
904  }
905 
906  // half -> unsigned int
907  inline
908  __device__
909  unsigned int __half2uint_rn(__half x)
910  {
911  return static_cast<__half_raw>(x).data;
912  }
913  inline
914  __device__
915  unsigned int __half2uint_rz(__half x)
916  {
917  return static_cast<__half_raw>(x).data;
918  }
919  inline
920  __device__
921  unsigned int __half2uint_rd(__half x)
922  {
923  return static_cast<__half_raw>(x).data;
924  }
925  inline
926  __device__
927  unsigned int __half2uint_ru(__half x)
928  {
929  return static_cast<__half_raw>(x).data;
930  }
931 
932  // unsigned int -> half
933  inline
934  __device__
935  __half __uint2half_rn(unsigned int x)
936  {
937  return __half_raw{static_cast<_Float16>(x)};
938  }
939  inline
940  __device__
941  __half __uint2half_rz(unsigned int x)
942  {
943  return __half_raw{static_cast<_Float16>(x)};
944  }
945  inline
946  __device__
947  __half __uint2half_rd(unsigned int x)
948  {
949  return __half_raw{static_cast<_Float16>(x)};
950  }
951  inline
952  __device__
953  __half __uint2half_ru(unsigned int x)
954  {
955  return __half_raw{static_cast<_Float16>(x)};
956  }
957 
958  // half -> unsigned short
959  inline
960  __device__
961  unsigned short __half2ushort_rn(__half x)
962  {
963  return static_cast<__half_raw>(x).data;
964  }
965  inline
966  __device__
967  unsigned short __half2ushort_rz(__half x)
968  {
969  return static_cast<__half_raw>(x).data;
970  }
971  inline
972  __device__
973  unsigned short __half2ushort_rd(__half x)
974  {
975  return static_cast<__half_raw>(x).data;
976  }
977  inline
978  __device__
979  unsigned short __half2ushort_ru(__half x)
980  {
981  return static_cast<__half_raw>(x).data;
982  }
983 
984  // unsigned short -> half
985  inline
986  __device__
987  __half __ushort2half_rn(unsigned short x)
988  {
989  return __half_raw{static_cast<_Float16>(x)};
990  }
991  inline
992  __device__
993  __half __ushort2half_rz(unsigned short x)
994  {
995  return __half_raw{static_cast<_Float16>(x)};
996  }
997  inline
998  __device__
999  __half __ushort2half_rd(unsigned short x)
1000  {
1001  return __half_raw{static_cast<_Float16>(x)};
1002  }
1003  inline
1004  __device__
1005  __half __ushort2half_ru(unsigned short x)
1006  {
1007  return __half_raw{static_cast<_Float16>(x)};
1008  }
1009 
1010  // half -> unsigned long long
1011  inline
1012  __device__
1013  unsigned long long __half2ull_rn(__half x)
1014  {
1015  return static_cast<__half_raw>(x).data;
1016  }
1017  inline
1018  __device__
1019  unsigned long long __half2ull_rz(__half x)
1020  {
1021  return static_cast<__half_raw>(x).data;
1022  }
1023  inline
1024  __device__
1025  unsigned long long __half2ull_rd(__half x)
1026  {
1027  return static_cast<__half_raw>(x).data;
1028  }
1029  inline
1030  __device__
1031  unsigned long long __half2ull_ru(__half x)
1032  {
1033  return static_cast<__half_raw>(x).data;
1034  }
1035 
1036  // unsigned long long -> half
1037  inline
1038  __device__
1039  __half __ull2half_rn(unsigned long long x)
1040  {
1041  return __half_raw{static_cast<_Float16>(x)};
1042  }
1043  inline
1044  __device__
1045  __half __ull2half_rz(unsigned long long x)
1046  {
1047  return __half_raw{static_cast<_Float16>(x)};
1048  }
1049  inline
1050  __device__
1051  __half __ull2half_rd(unsigned long long x)
1052  {
1053  return __half_raw{static_cast<_Float16>(x)};
1054  }
1055  inline
1056  __device__
1057  __half __ull2half_ru(unsigned long long x)
1058  {
1059  return __half_raw{static_cast<_Float16>(x)};
1060  }
1061 
1062  // Load primitives
1063  inline
1064  __device__
1065  __half __ldg(const __half* ptr) { return *ptr; }
1066  inline
1067  __device__
1068  __half __ldcg(const __half* ptr) { return *ptr; }
1069  inline
1070  __device__
1071  __half __ldca(const __half* ptr) { return *ptr; }
1072  inline
1073  __device__
1074  __half __ldcs(const __half* ptr) { return *ptr; }
1075 
1076  inline
1077  __HOST_DEVICE__
1078  __half2 __ldg(const __half2* ptr) { return *ptr; }
1079  inline
1080  __HOST_DEVICE__
1081  __half2 __ldcg(const __half2* ptr) { return *ptr; }
1082  inline
1083  __HOST_DEVICE__
1084  __half2 __ldca(const __half2* ptr) { return *ptr; }
1085  inline
1086  __HOST_DEVICE__
1087  __half2 __ldcs(const __half2* ptr) { return *ptr; }
1088 
1089  // Relations
1090  inline
1091  __device__
1092  bool __heq(__half x, __half y)
1093  {
1094  return static_cast<__half_raw>(x).data ==
1095  static_cast<__half_raw>(y).data;
1096  }
1097  inline
1098  __device__
1099  bool __hne(__half x, __half y)
1100  {
1101  return static_cast<__half_raw>(x).data !=
1102  static_cast<__half_raw>(y).data;
1103  }
1104  inline
1105  __device__
1106  bool __hle(__half x, __half y)
1107  {
1108  return static_cast<__half_raw>(x).data <=
1109  static_cast<__half_raw>(y).data;
1110  }
1111  inline
1112  __device__
1113  bool __hge(__half x, __half y)
1114  {
1115  return static_cast<__half_raw>(x).data >=
1116  static_cast<__half_raw>(y).data;
1117  }
1118  inline
1119  __device__
1120  bool __hlt(__half x, __half y)
1121  {
1122  return static_cast<__half_raw>(x).data <
1123  static_cast<__half_raw>(y).data;
1124  }
1125  inline
1126  __device__
1127  bool __hgt(__half x, __half y)
1128  {
1129  return static_cast<__half_raw>(x).data >
1130  static_cast<__half_raw>(y).data;
1131  }
1132  inline __device__
1133  bool __hequ(__half x, __half y) {
1134  return !(static_cast<__half_raw>(x).data < static_cast<__half_raw>(y).data) &&
1135  !(static_cast<__half_raw>(x).data > static_cast<__half_raw>(y).data);
1136  }
1137  inline __device__
1138  bool __hneu(__half x, __half y) {
1139  return !(static_cast<__half_raw>(x).data == static_cast<__half_raw>(y).data);
1140  }
1141  inline __device__
1142  bool __hleu(__half x, __half y) {
1143  return !(static_cast<__half_raw>(x).data > static_cast<__half_raw>(y).data);
1144  }
1145  inline
1146  __device__
1147  bool __hgeu(__half x, __half y) {
1148  return !(static_cast<__half_raw>(x).data < static_cast<__half_raw>(y).data);
1149  }
1150  inline
1151  __device__
1152  bool __hltu(__half x, __half y) {
1153  return !(static_cast<__half_raw>(x).data >= static_cast<__half_raw>(y).data);
1154  }
1155  inline
1156  __device__
1157  bool __hgtu(__half x, __half y) {
1158  return !(static_cast<__half_raw>(x).data <= static_cast<__half_raw>(y).data);
1159  }
1160 
1161  inline
1162  __HOST_DEVICE__
1163  __half2 __heq2(__half2 x, __half2 y)
1164  {
1165  auto r = static_cast<__half2_raw>(x).data ==
1166  static_cast<__half2_raw>(y).data;
1167  return __builtin_convertvector(-r, _Float16_2);
1168  }
1169  inline
1170  __HOST_DEVICE__
1171  __half2 __hne2(__half2 x, __half2 y)
1172  {
1173  auto r = static_cast<__half2_raw>(x).data !=
1174  static_cast<__half2_raw>(y).data;
1175  return __builtin_convertvector(-r, _Float16_2);
1176  }
1177  inline
1178  __HOST_DEVICE__
1179  __half2 __hle2(__half2 x, __half2 y)
1180  {
1181  auto r = static_cast<__half2_raw>(x).data <=
1182  static_cast<__half2_raw>(y).data;
1183  return __builtin_convertvector(-r, _Float16_2);
1184  }
1185  inline
1186  __HOST_DEVICE__
1187  __half2 __hge2(__half2 x, __half2 y)
1188  {
1189  auto r = static_cast<__half2_raw>(x).data >=
1190  static_cast<__half2_raw>(y).data;
1191  return __builtin_convertvector(-r, _Float16_2);
1192  }
1193  inline
1194  __HOST_DEVICE__
1195  __half2 __hlt2(__half2 x, __half2 y)
1196  {
1197  auto r = static_cast<__half2_raw>(x).data <
1198  static_cast<__half2_raw>(y).data;
1199  return __builtin_convertvector(-r, _Float16_2);
1200  }
1201  inline
1202  __HOST_DEVICE__
1203  __half2 __hgt2(__half2 x, __half2 y)
1204  {
1205  auto r = static_cast<__half2_raw>(x).data >
1206  static_cast<__half2_raw>(y).data;
1207  return __builtin_convertvector(-r, _Float16_2);
1208  }
1209  inline __HOST_DEVICE__
1210  __half2 __hequ2(__half2 x, __half2 y) {
1211  auto r = !(static_cast<__half2_raw>(x).data < static_cast<__half2_raw>(y).data) &&
1212  !(static_cast<__half2_raw>(x).data > static_cast<__half2_raw>(y).data);
1213  return __builtin_convertvector(-r, _Float16_2);
1214  }
1215  inline
1216  __HOST_DEVICE__
1217  __half2 __hneu2(__half2 x, __half2 y) {
1218  auto r = !(static_cast<__half2_raw>(x).data == static_cast<__half2_raw>(y).data);
1219  return __builtin_convertvector(-r, _Float16_2);
1220  }
1221  inline
1222  __HOST_DEVICE__
1223  __half2 __hleu2(__half2 x, __half2 y) {
1224  auto r = !(static_cast<__half2_raw>(x).data > static_cast<__half2_raw>(y).data);
1225  return __builtin_convertvector(-r, _Float16_2);
1226  }
1227  inline
1228  __HOST_DEVICE__
1229  __half2 __hgeu2(__half2 x, __half2 y) {
1230  auto r = !(static_cast<__half2_raw>(x).data < static_cast<__half2_raw>(y).data);
1231  return __builtin_convertvector(-r, _Float16_2);
1232  }
1233  inline
1234  __HOST_DEVICE__
1235  __half2 __hltu2(__half2 x, __half2 y) {
1236  auto r = !(static_cast<__half2_raw>(x).data >= static_cast<__half2_raw>(y).data);
1237  return __builtin_convertvector(-r, _Float16_2);
1238  }
1239  inline
1240  __HOST_DEVICE__
1241  __half2 __hgtu2(__half2 x, __half2 y) {
1242  auto r = !(static_cast<__half2_raw>(x).data <= static_cast<__half2_raw>(y).data);
1243  return __builtin_convertvector(-r, _Float16_2);
1244  }
1245 
1246  inline
1247  __HOST_DEVICE__
1248  bool __hbeq2(__half2 x, __half2 y)
1249  {
1250  auto r = static_cast<__half2_raw>(__heq2(x, y));
1251  return r.data.x != 0 && r.data.y != 0;
1252  }
1253  inline
1254  __HOST_DEVICE__
1255  bool __hbne2(__half2 x, __half2 y)
1256  {
1257  auto r = static_cast<__half2_raw>(__hne2(x, y));
1258  return r.data.x != 0 && r.data.y != 0;
1259  }
1260  inline
1261  __HOST_DEVICE__
1262  bool __hble2(__half2 x, __half2 y)
1263  {
1264  auto r = static_cast<__half2_raw>(__hle2(x, y));
1265  return r.data.x != 0 && r.data.y != 0;
1266  }
1267  inline
1268  __HOST_DEVICE__
1269  bool __hbge2(__half2 x, __half2 y)
1270  {
1271  auto r = static_cast<__half2_raw>(__hge2(x, y));
1272  return r.data.x != 0 && r.data.y != 0;
1273  }
1274  inline
1275  __HOST_DEVICE__
1276  bool __hblt2(__half2 x, __half2 y)
1277  {
1278  auto r = static_cast<__half2_raw>(__hlt2(x, y));
1279  return r.data.x != 0 && r.data.y != 0;
1280  }
1281  inline
1282  __HOST_DEVICE__
1283  bool __hbgt2(__half2 x, __half2 y)
1284  {
1285  auto r = static_cast<__half2_raw>(__hgt2(x, y));
1286  return r.data.x != 0 && r.data.y != 0;
1287  }
1288  inline
1289  __HOST_DEVICE__
1290  bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); }
1291  inline
1292  __HOST_DEVICE__
1293  bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); }
1294  inline
1295  __HOST_DEVICE__
1296  bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); }
1297  inline
1298  __HOST_DEVICE__
1299  bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); }
1300  inline
1301  __HOST_DEVICE__
1302  bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); }
1303  inline
1304  __HOST_DEVICE__
1305  bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); }
1306  inline
1307  __device__
1308  __half __hmax(const __half x, const __half y) {
1309  return __half_raw{__ocml_fmax_f16(static_cast<__half_raw>(x).data,
1310  static_cast<__half_raw>(y).data)};
1311  }
1312  inline
1313  __device__
1314  __half __hmax_nan(const __half x, const __half y) {
1315  if(__ocml_isnan_f16(static_cast<__half_raw>(x).data)) {
1316  return x;
1317  } else if (__ocml_isnan_f16(static_cast<__half_raw>(y).data)) {
1318  return y;
1319  }
1320  return __hmax(x, y);
1321  }
1322  inline
1323  __device__
1324  __half __hmin(const __half x, const __half y) {
1325  return __half_raw{__ocml_fmin_f16(static_cast<__half_raw>(x).data,
1326  static_cast<__half_raw>(y).data)};
1327  }
1328  inline
1329  __device__
1330  __half __hmin_nan(const __half x, const __half y) {
1331  if(__ocml_isnan_f16(static_cast<__half_raw>(x).data)) {
1332  return x;
1333  } else if (__ocml_isnan_f16(static_cast<__half_raw>(y).data)) {
1334  return y;
1335  }
1336  return __hmin(x, y);
1337  }
1338 
1339  // Arithmetic
1340  inline
1341  __device__
1342  __half __clamp_01(__half x)
1343  {
1344  auto r = static_cast<__half_raw>(x);
1345 
1346  if (__hlt(x, __half_raw{0})) return __half_raw{0};
1347  if (__hlt(__half_raw{1}, x)) return __half_raw{1};
1348  return r;
1349  }
1350 
1351  inline
1352  __device__
1353  __half __hadd(__half x, __half y)
1354  {
1355  return __half_raw{
1356  static_cast<__half_raw>(x).data +
1357  static_cast<__half_raw>(y).data};
1358  }
1359  inline
1360  __device__
1361  __half __habs(__half x)
1362  {
1363  return __half_raw{
1364  __ocml_fabs_f16(static_cast<__half_raw>(x).data)};
1365  }
1366  inline
1367  __device__
1368  __half __hsub(__half x, __half y)
1369  {
1370  return __half_raw{
1371  static_cast<__half_raw>(x).data -
1372  static_cast<__half_raw>(y).data};
1373  }
1374  inline
1375  __device__
1376  __half __hmul(__half x, __half y)
1377  {
1378  return __half_raw{
1379  static_cast<__half_raw>(x).data *
1380  static_cast<__half_raw>(y).data};
1381  }
1382  inline
1383  __device__
1384  __half __hadd_sat(__half x, __half y)
1385  {
1386  return __clamp_01(__hadd(x, y));
1387  }
1388  inline
1389  __device__
1390  __half __hsub_sat(__half x, __half y)
1391  {
1392  return __clamp_01(__hsub(x, y));
1393  }
1394  inline
1395  __device__
1396  __half __hmul_sat(__half x, __half y)
1397  {
1398  return __clamp_01(__hmul(x, y));
1399  }
1400  inline
1401  __device__
1402  __half __hfma(__half x, __half y, __half z)
1403  {
1404  return __half_raw{__ocml_fma_f16(
1405  static_cast<__half_raw>(x).data,
1406  static_cast<__half_raw>(y).data,
1407  static_cast<__half_raw>(z).data)};
1408  }
1409  inline
1410  __device__
1411  __half __hfma_sat(__half x, __half y, __half z)
1412  {
1413  return __clamp_01(__hfma(x, y, z));
1414  }
1415  inline
1416  __device__
1417  __half __hdiv(__half x, __half y)
1418  {
1419  return __half_raw{
1420  static_cast<__half_raw>(x).data /
1421  static_cast<__half_raw>(y).data};
1422  }
1423 
1424  inline
1425  __HOST_DEVICE__
1426  __half2 __hadd2(__half2 x, __half2 y)
1427  {
1428  return __half2{
1429  static_cast<__half2_raw>(x).data +
1430  static_cast<__half2_raw>(y).data};
1431  }
1432  inline
1433  __HOST_DEVICE__
1434  __half2 __habs2(__half2 x)
1435  {
1436  return __half2{
1437  __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)};
1438  }
1439  inline
1440  __HOST_DEVICE__
1441  __half2 __hsub2(__half2 x, __half2 y)
1442  {
1443  return __half2{
1444  static_cast<__half2_raw>(x).data -
1445  static_cast<__half2_raw>(y).data};
1446  }
1447  inline
1448  __HOST_DEVICE__
1449  __half2 __hmul2(__half2 x, __half2 y)
1450  {
1451  return __half2{
1452  static_cast<__half2_raw>(x).data *
1453  static_cast<__half2_raw>(y).data};
1454  }
1455  inline
1456  __HOST_DEVICE__
1457  __half2 __hadd2_sat(__half2 x, __half2 y)
1458  {
1459  auto r = static_cast<__half2_raw>(__hadd2(x, y));
1460  return __half2{
1461  __clamp_01(__half_raw{r.data.x}),
1462  __clamp_01(__half_raw{r.data.y})};
1463  }
1464  inline
1465  __HOST_DEVICE__
1466  __half2 __hsub2_sat(__half2 x, __half2 y)
1467  {
1468  auto r = static_cast<__half2_raw>(__hsub2(x, y));
1469  return __half2{
1470  __clamp_01(__half_raw{r.data.x}),
1471  __clamp_01(__half_raw{r.data.y})};
1472  }
1473  inline
1474  __HOST_DEVICE__
1475  __half2 __hmul2_sat(__half2 x, __half2 y)
1476  {
1477  auto r = static_cast<__half2_raw>(__hmul2(x, y));
1478  return __half2{
1479  __clamp_01(__half_raw{r.data.x}),
1480  __clamp_01(__half_raw{r.data.y})};
1481  }
1482  inline
1483  __HOST_DEVICE__
1484  __half2 __hfma2(__half2 x, __half2 y, __half2 z)
1485  {
1486  return __half2{__ocml_fma_2f16(x, y, z)};
1487  }
1488  inline
1489  __HOST_DEVICE__
1490  __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
1491  {
1492  auto r = static_cast<__half2_raw>(__hfma2(x, y, z));
1493  return __half2{
1494  __clamp_01(__half_raw{r.data.x}),
1495  __clamp_01(__half_raw{r.data.y})};
1496  }
1497  inline
1498  __HOST_DEVICE__
1499  __half2 __h2div(__half2 x, __half2 y)
1500  {
1501  return __half2{
1502  static_cast<__half2_raw>(x).data /
1503  static_cast<__half2_raw>(y).data};
1504  }
1505 
1506  // Math functions
1507  #if defined(__clang__) && defined(__HIP__)
1508  inline
1509  __device__
1510  float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) {
1511  return __ockl_fdot2(static_cast<__half2_raw>(a).data,
1512  static_cast<__half2_raw>(b).data,
1513  c, saturate);
1514  }
1515  #endif
1516  inline
1517  __device__
1518  __half htrunc(__half x)
1519  {
1520  return __half_raw{
1521  __ocml_trunc_f16(static_cast<__half_raw>(x).data)};
1522  }
1523  inline
1524  __device__
1525  __half hceil(__half x)
1526  {
1527  return __half_raw{
1528  __ocml_ceil_f16(static_cast<__half_raw>(x).data)};
1529  }
1530  inline
1531  __device__
1532  __half hfloor(__half x)
1533  {
1534  return __half_raw{
1535  __ocml_floor_f16(static_cast<__half_raw>(x).data)};
1536  }
1537  inline
1538  __device__
1539  __half hrint(__half x)
1540  {
1541  return __half_raw{
1542  __ocml_rint_f16(static_cast<__half_raw>(x).data)};
1543  }
1544  inline
1545  __device__
1546  __half hsin(__half x)
1547  {
1548  return __half_raw{
1549  __ocml_sin_f16(static_cast<__half_raw>(x).data)};
1550  }
1551  inline
1552  __device__
1553  __half hcos(__half x)
1554  {
1555  return __half_raw{
1556  __ocml_cos_f16(static_cast<__half_raw>(x).data)};
1557  }
1558  inline
1559  __device__
1560  __half hexp(__half x)
1561  {
1562  return __half_raw{
1563  __ocml_exp_f16(static_cast<__half_raw>(x).data)};
1564  }
1565  inline
1566  __device__
1567  __half hexp2(__half x)
1568  {
1569  return __half_raw{
1570  __ocml_exp2_f16(static_cast<__half_raw>(x).data)};
1571  }
1572  inline
1573  __device__
1574  __half hexp10(__half x)
1575  {
1576  return __half_raw{
1577  __ocml_exp10_f16(static_cast<__half_raw>(x).data)};
1578  }
1579  inline
1580  __device__
1581  __half hlog2(__half x)
1582  {
1583  return __half_raw{
1584  __ocml_log2_f16(static_cast<__half_raw>(x).data)};
1585  }
1586  inline
1587  __device__
1588  __half hlog(__half x)
1589  {
1590  return __half_raw{
1591  __ocml_log_f16(static_cast<__half_raw>(x).data)};
1592  }
1593  inline
1594  __device__
1595  __half hlog10(__half x)
1596  {
1597  return __half_raw{
1598  __ocml_log10_f16(static_cast<__half_raw>(x).data)};
1599  }
1600  inline
1601  __device__
1602  __half hrcp(__half x)
1603  {
1604  return __half_raw{
1605  static_cast<_Float16>(1.0f) /static_cast<__half_raw>(x).data};
1606  }
1607  inline
1608  __device__
1609  __half hrsqrt(__half x)
1610  {
1611  return __half_raw{
1612  __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)};
1613  }
1614  inline
1615  __device__
1616  __half hsqrt(__half x)
1617  {
1618  return __half_raw{
1619  __ocml_sqrt_f16(static_cast<__half_raw>(x).data)};
1620  }
1621  inline
1622  __device__
1623  bool __hisinf(__half x)
1624  {
1625  return __ocml_isinf_f16(static_cast<__half_raw>(x).data);
1626  }
1627  inline
1628  __device__
1629  bool __hisnan(__half x)
1630  {
1631  return __ocml_isnan_f16(static_cast<__half_raw>(x).data);
1632  }
1633  inline
1634  __device__
1635  __half __hneg(__half x)
1636  {
1637  return __half_raw{-static_cast<__half_raw>(x).data};
1638  }
1639 
1640  inline
1641  __HOST_DEVICE__
1642  __half2 h2trunc(__half2 x)
1643  {
1644  return __half2{__ocml_trunc_2f16(x)};
1645  }
1646  inline
1647  __HOST_DEVICE__
1648  __half2 h2ceil(__half2 x)
1649  {
1650  return __half2{__ocml_ceil_2f16(x)};
1651  }
1652  inline
1653  __HOST_DEVICE__
1654  __half2 h2floor(__half2 x)
1655  {
1656  return __half2{__ocml_floor_2f16(x)};
1657  }
1658  inline
1659  __HOST_DEVICE__
1660  __half2 h2rint(__half2 x)
1661  {
1662  return __half2{__ocml_rint_2f16(x)};
1663  }
1664  inline
1665  __HOST_DEVICE__
1666  __half2 h2sin(__half2 x)
1667  {
1668  return __half2{__ocml_sin_2f16(x)};
1669  }
1670  inline
1671  __HOST_DEVICE__
1672  __half2 h2cos(__half2 x)
1673  {
1674  return __half2{__ocml_cos_2f16(x)};
1675  }
1676  inline
1677  __HOST_DEVICE__
1678  __half2 h2exp(__half2 x)
1679  {
1680  return __half2{__ocml_exp_2f16(x)};
1681  }
1682  inline
1683  __HOST_DEVICE__
1684  __half2 h2exp2(__half2 x)
1685  {
1686  return __half2{__ocml_exp2_2f16(x)};
1687  }
1688  inline
1689  __HOST_DEVICE__
1690  __half2 h2exp10(__half2 x)
1691  {
1692  return __half2{__ocml_exp10_2f16(x)};
1693  }
1694  inline
1695  __HOST_DEVICE__
1696  __half2 h2log2(__half2 x)
1697  {
1698  return __half2{__ocml_log2_2f16(x)};
1699  }
1700  inline
1701  __HOST_DEVICE__
1702  __half2 h2log(__half2 x) { return __ocml_log_2f16(x); }
1703  inline
1704  __HOST_DEVICE__
1705  __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); }
1706  inline
1707  __HOST_DEVICE__
1708  __half2 h2rcp(__half2 x) {
1709  return _Float16_2{
1710  _Float16_2{static_cast<_Float16>(1.0f), static_cast<_Float16>(1.0f)} / x.data};
1711  }
1712  inline
1713  __HOST_DEVICE__
1714  __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); }
1715  inline
1716  __HOST_DEVICE__
1717  __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); }
1718  inline
1719  __HOST_DEVICE__
1720  __half2 __hisinf2(__half2 x)
1721  {
1722  auto r = __ocml_isinf_2f16(x);
1723  return __half2{_Float16_2{
1724  static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
1725  }
1726  inline
1727  __HOST_DEVICE__
1728  __half2 __hisnan2(__half2 x)
1729  {
1730  auto r = __ocml_isnan_2f16(x);
1731  return __half2{_Float16_2{
1732  static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
1733  }
1734  inline
1735  __HOST_DEVICE__
1736  __half2 __hneg2(__half2 x)
1737  {
1738  return __half2{-static_cast<__half2_raw>(x).data};
1739  }
1740  } // Anonymous namespace.
1741 
1742  #if !defined(HIP_NO_HALF)
1743  using half = __half;
1744  using half2 = __half2;
1745  #endif
1746  __device__
1747  inline
1748  __half __shfl(__half var, int src_lane, int width = warpSize) {
1749  union { int i; __half h; } tmp; tmp.h = var;
1750  tmp.i = __shfl(tmp.i, src_lane, width);
1751  return tmp.h;
1752  }
1753  __device__
1754  inline
1755  __half2 __shfl(__half2 var, int src_lane, int width = warpSize) {
1756  union { int i; __half2 h; } tmp; tmp.h = var;
1757  tmp.i = __shfl(tmp.i, src_lane, width);
1758  return tmp.h;
1759  }
1760  __device__
1761  inline
1762  __half __shfl_up(__half var, unsigned int lane_delta, int width = warpSize) {
1763  union { int i; __half h; } tmp; tmp.h = var;
1764  tmp.i = __shfl_up(tmp.i, lane_delta, width);
1765  return tmp.h;
1766  }
1767  __device__
1768  inline
1769  __half2 __shfl_up(__half2 var, unsigned int lane_delta, int width = warpSize) {
1770  union { int i; __half2 h; } tmp; tmp.h = var;
1771  tmp.i = __shfl_up(tmp.i, lane_delta, width);
1772  return tmp.h;
1773  }
1774  __device__
1775  inline
1776  __half __shfl_down(__half var, unsigned int lane_delta, int width = warpSize) {
1777  union { int i; __half h; } tmp; tmp.h = var;
1778  tmp.i = __shfl_down(tmp.i, lane_delta, width);
1779  return tmp.h;
1780  }
1781  __device__
1782  inline
1783  __half2 __shfl_down(__half2 var, unsigned int lane_delta, int width = warpSize) {
1784  union { int i; __half2 h; } tmp; tmp.h = var;
1785  tmp.i = __shfl_down(tmp.i, lane_delta, width);
1786  return tmp.h;
1787  }
1788  __device__
1789  inline
1790  __half __shfl_xor(__half var, int lane_mask, int width = warpSize) {
1791  union { int i; __half h; } tmp; tmp.h = var;
1792  tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1793  return tmp.h;
1794  }
1795  __device__
1796  inline
1797  __half2 __shfl_xor(__half2 var, int lane_mask, int width = warpSize) {
1798  union { int i; __half2 h; } tmp; tmp.h = var;
1799  tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1800  return tmp.h;
1801  }
1802  #endif // defined(__cplusplus)
1803 #elif defined(__GNUC__)
1804  #if !defined(__HIPCC_RTC__)
1805  #include "hip_fp16_gcc.h"
1806  #endif
1807 #endif // !defined(__clang__) && defined(__GNUC__)
1808 
1809 #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
#define __host__
Definition: host_defines.h:170
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition: amd_hip_bf16.h:681
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16 &l)
Operator to unary+ on a __hip_bfloat16 number.
Definition: amd_hip_bf16.h:835
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator/=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide-assign two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:930
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator/(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:921
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator*=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply-assign two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:826
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16 &l)
Operator to negate a __hip_bfloat16 number.
Definition: amd_hip_bf16.h:850
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition: amd_hip_bf16.h:715
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator+=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to add-assign two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:903
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition: amd_hip_bf16.h:673
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator*(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:817
__BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition: amd_hip_bf16.h:697
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition: amd_hip_bf16.h:707
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator++(__hip_bfloat16 &l, const int)
Operator to post increment a __hip_bfloat16 number.
Definition: amd_hip_bf16.h:865
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition: amd_hip_bf16.h:725
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator--(__hip_bfloat16 &l, const int)
Operator to post decrement a __hip_bfloat16 number.
Definition: amd_hip_bf16.h:884
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition: amd_hip_bf16.h:689
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator-=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to subtract-assign two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:912
__BF16_HOST_DEVICE_STATIC__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition: amd_hip_bf16.h:1067
__BF16_HOST_DEVICE_STATIC__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition: amd_hip_bf16.h:1084
__BF16_HOST_DEVICE_STATIC__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition: amd_hip_bf16.h:1116
__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform an equal compare on two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:1463
__BF16_HOST_DEVICE_STATIC__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than equal.
Definition: amd_hip_bf16.h:1172
__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a not equal on two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:1471
__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than on two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:1495
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition: amd_hip_bf16.h:1144
__BF16_HOST_DEVICE_STATIC__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition: amd_hip_bf16.h:1100
__BF16_HOST_DEVICE_STATIC__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition: amd_hip_bf16.h:1180
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition: amd_hip_bf16.h:1132
__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than equal on two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:1487
__BF16_HOST_DEVICE_STATIC__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition: amd_hip_bf16.h:1188
__BF16_HOST_DEVICE_STATIC__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition: amd_hip_bf16.h:1124
__BF16_HOST_DEVICE_STATIC__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition: amd_hip_bf16.h:1092
__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than on two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:1479
__BF16_HOST_DEVICE_STATIC__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition: amd_hip_bf16.h:1164
__BF16_HOST_DEVICE_STATIC__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition: amd_hip_bf16.h:1108
__BF16_HOST_DEVICE_STATIC__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition: amd_hip_bf16.h:1156
__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than equal on two __hip_bfloat16 numbers.
Definition: amd_hip_bf16.h:1503
__BF16_HOST_DEVICE_STATIC__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition: amd_hip_bf16.h:1197
__BF16_HOST_DEVICE_STATIC__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition: amd_hip_bf16.h:1075
__BF16_HOST_DEVICE_STATIC__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition: amd_hip_bf16.h:1239
__BF16_HOST_DEVICE_STATIC__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition: amd_hip_bf16.h:1250
__BF16_HOST_DEVICE_STATIC__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition: amd_hip_bf16.h:1272
__BF16_HOST_DEVICE_STATIC__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition: amd_hip_bf16.h:1294
__BF16_HOST_DEVICE_STATIC__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition: amd_hip_bf16.h:1283
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition: amd_hip_bf16.h:1448
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition: amd_hip_bf16.h:1354
__BF16_HOST_DEVICE_STATIC__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition: amd_hip_bf16.h:1316
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition: amd_hip_bf16.h:1409
__BF16_HOST_DEVICE_STATIC__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition: amd_hip_bf16.h:1305
__BF16_HOST_DEVICE_STATIC__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition: amd_hip_bf16.h:1217
__BF16_HOST_DEVICE_STATIC__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition: amd_hip_bf16.h:1206
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition: amd_hip_bf16.h:1394
__BF16_HOST_DEVICE_STATIC__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition: amd_hip_bf16.h:1328
__BF16_HOST_DEVICE_STATIC__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition: amd_hip_bf16.h:1261
__BF16_HOST_DEVICE_STATIC__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition: amd_hip_bf16.h:1228
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition: amd_hip_bf16.h:1339
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition: amd_hip_bf16.h:1369
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition: amd_hip_bf16.h:1384
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition: amd_hip_bf16.h:749
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition: amd_hip_bf16.h:805
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition: amd_hip_bf16.h:784
__BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition: amd_hip_bf16.h:770
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition: amd_hip_bf16.h:758
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition: amd_hip_bf16.h:796
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition: amd_hip_bf16.h:735
__BF16_HOST_DEVICE_STATIC__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition: amd_hip_bf16.h:628
__BF16_HOST_DEVICE_STATIC__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition: amd_hip_bf16.h:590
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition: amd_hip_bf16.h:637
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition: amd_hip_bf16.h:1588
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition: amd_hip_bf16.h:1612
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition: amd_hip_bf16.h:1596
__BF16_DEVICE_STATIC__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition: amd_hip_bf16.h:1564
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition: amd_hip_bf16.h:1636
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition: amd_hip_bf16.h:1668
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition: amd_hip_bf16.h:1620
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition: amd_hip_bf16.h:1660
__BF16_DEVICE_STATIC__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition: amd_hip_bf16.h:1604
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition: amd_hip_bf16.h:1644
__BF16_DEVICE_STATIC__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition: amd_hip_bf16.h:1676
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition: amd_hip_bf16.h:1652
__BF16_DEVICE_STATIC__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition: amd_hip_bf16.h:1572
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition: amd_hip_bf16.h:1628
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition: amd_hip_bf16.h:1580
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition: amd_hip_bf16.h:1810
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition: amd_hip_bf16.h:1765
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition: amd_hip_bf16.h:1738
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition: amd_hip_bf16.h:1702
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition: amd_hip_bf16.h:1693
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition: amd_hip_bf16.h:1792
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition: amd_hip_bf16.h:1756
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition: amd_hip_bf16.h:1684
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition: amd_hip_bf16.h:1729
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition: amd_hip_bf16.h:1711
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition: amd_hip_bf16.h:1720
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition: amd_hip_bf16.h:1747
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition: amd_hip_bf16.h:1783
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition: amd_hip_bf16.h:1774
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition: amd_hip_bf16.h:1801
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Definition: hip_fp16_math_fwd.h:57
Definition: amd_hip_vector_types.h:2035
Definition: hip_fp16_gcc.h:7
Definition: hip_fp16_gcc.h:11