HIP: Heterogenous-computing Interface for Portability
amd_hip_vector_types.h
1 /*
2 Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
28 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
29 #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
30 
32 
33 #if defined(__HIPCC_RTC__)
34  #define __HOST_DEVICE__ __device__
35 #else
36  #define __HOST_DEVICE__ __host__ __device__
37 #endif
38 
39 #if defined(__has_attribute)
40  #if __has_attribute(ext_vector_type)
41  #define __HIP_USE_NATIVE_VECTOR__ 1
42  #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
43  #else
44  #define __NATIVE_VECTOR__(n, T) T[n]
45  #endif
46 
47 #if defined(__cplusplus)
48 #if !defined(__HIPCC_RTC__)
49  #include <array>
50  #include <iosfwd>
51  #include <type_traits>
52 #else
53 namespace std {
54 using ::size_t;
55 
56 template <class _Tp, _Tp __v> struct integral_constant {
57  static constexpr const _Tp value = __v;
58  typedef _Tp value_type;
59  typedef integral_constant type;
60  constexpr operator value_type() const { return value; }
61  constexpr value_type operator()() const { return value; }
62 };
63 template <class _Tp, _Tp __v> constexpr const _Tp integral_constant<_Tp, __v>::value;
64 
65 typedef integral_constant<bool, true> true_type;
66 typedef integral_constant<bool, false> false_type;
67 
68 template <bool B> using bool_constant = integral_constant<bool, B>;
69 typedef bool_constant<true> true_type;
70 typedef bool_constant<false> false_type;
71 
72 template <bool __B, class __T = void> struct enable_if {};
73 template <class __T> struct enable_if<true, __T> { typedef __T type; };
74 
75 template<bool _B> struct true_or_false_type : public false_type {};
76 template<> struct true_or_false_type<true> : public true_type {};
77 
78 template <class _Tp> struct is_integral : public false_type {};
79 template <> struct is_integral<bool> : public true_type {};
80 template <> struct is_integral<char> : public true_type {};
81 template <> struct is_integral<signed char> : public true_type {};
82 template <> struct is_integral<unsigned char> : public true_type {};
83 template <> struct is_integral<wchar_t> : public true_type {};
84 template <> struct is_integral<short> : public true_type {};
85 template <> struct is_integral<unsigned short> : public true_type {};
86 template <> struct is_integral<int> : public true_type {};
87 template <> struct is_integral<unsigned int> : public true_type {};
88 template <> struct is_integral<long> : public true_type {};
89 template <> struct is_integral<unsigned long> : public true_type {};
90 template <> struct is_integral<long long> : public true_type {};
91 template <> struct is_integral<unsigned long long> : public true_type {};
92 
93 template <class _Tp> struct is_arithmetic : public false_type {};
94 template <> struct is_arithmetic<bool> : public true_type {};
95 template <> struct is_arithmetic<char> : public true_type {};
96 template <> struct is_arithmetic<signed char> : public true_type {};
97 template <> struct is_arithmetic<unsigned char> : public true_type {};
98 template <> struct is_arithmetic<wchar_t> : public true_type {};
99 template <> struct is_arithmetic<short> : public true_type {};
100 template <> struct is_arithmetic<unsigned short> : public true_type {};
101 template <> struct is_arithmetic<int> : public true_type {};
102 template <> struct is_arithmetic<unsigned int> : public true_type {};
103 template <> struct is_arithmetic<long> : public true_type {};
104 template <> struct is_arithmetic<unsigned long> : public true_type {};
105 template <> struct is_arithmetic<long long> : public true_type {};
106 template <> struct is_arithmetic<unsigned long long> : public true_type {};
107 template <> struct is_arithmetic<float> : public true_type {};
108 template <> struct is_arithmetic<double> : public true_type {};
109 
110 template<typename _Tp> struct is_floating_point : public false_type {};
111 template<> struct is_floating_point<float> : public true_type {};
112 template<> struct is_floating_point<double> : public true_type {};
113 template<> struct is_floating_point<long double> : public true_type {};
114 
115 template <typename __T, typename __U> struct is_same : public false_type {};
116 template <typename __T> struct is_same<__T, __T> : public true_type {};
117 
118 template<typename _Tp, bool = is_arithmetic<_Tp>::value>
119  struct is_signed : public false_type {};
120 template<typename _Tp>
121  struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {};
122 
123 template <class _T1, class _T2> struct is_convertible
124  : public true_or_false_type<__is_convertible_to(_T1, _T2)> {};
125 
126 template<typename _CharT> struct char_traits;
127 template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_istream;
128 template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_ostream;
129 typedef basic_istream<char> istream;
130 typedef basic_ostream<char> ostream;
131 
132 template <typename __T> struct is_scalar : public integral_constant<bool, __is_scalar(__T)> {};
133 } // Namespace std.
134 #endif // defined(__HIPCC_RTC__)
135 
136  namespace hip_impl {
137  inline
138  constexpr
139  unsigned int next_pot(unsigned int x) {
140  // Precondition: x > 1.
141  return 1u << (32u - __builtin_clz(x - 1u));
142  }
143  } // Namespace hip_impl.
144 
145  template<typename T, unsigned int n> struct HIP_vector_base;
146 
147  template<typename T>
148  struct HIP_vector_base<T, 1> {
149  using Native_vec_ = __NATIVE_VECTOR__(1, T);
150 
151  union {
152  Native_vec_ data;
153  struct {
154  T x;
155  };
156  };
157 
158  using value_type = T;
159 
160  __HOST_DEVICE__
161  HIP_vector_base() = default;
162  __HOST_DEVICE__
163  explicit
164  constexpr
165  HIP_vector_base(T x_) noexcept : data{x_} {}
166  __HOST_DEVICE__
167  constexpr
168  HIP_vector_base(const HIP_vector_base&) = default;
169  __HOST_DEVICE__
170  constexpr
171  HIP_vector_base(HIP_vector_base&&) = default;
172  __HOST_DEVICE__
173  ~HIP_vector_base() = default;
174  __HOST_DEVICE__
175  HIP_vector_base& operator=(const HIP_vector_base&) = default;
176  };
177 
178  template<typename T>
179  struct HIP_vector_base<T, 2> {
180  using Native_vec_ = __NATIVE_VECTOR__(2, T);
181 
182  union
183  #if !__has_attribute(ext_vector_type)
184  alignas(hip_impl::next_pot(2 * sizeof(T)))
185  #endif
186  {
187  Native_vec_ data;
188  struct {
189  T x;
190  T y;
191  };
192  };
193 
194  using value_type = T;
195 
196  __HOST_DEVICE__
197  HIP_vector_base() = default;
198  __HOST_DEVICE__
199  explicit
200  constexpr
201  HIP_vector_base(T x_) noexcept : data{x_, x_} {}
202  __HOST_DEVICE__
203  constexpr
204  HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
205  __HOST_DEVICE__
206  constexpr
207  HIP_vector_base(const HIP_vector_base&) = default;
208  __HOST_DEVICE__
209  constexpr
210  HIP_vector_base(HIP_vector_base&&) = default;
211  __HOST_DEVICE__
212  ~HIP_vector_base() = default;
213  __HOST_DEVICE__
214  HIP_vector_base& operator=(const HIP_vector_base&) = default;
215  };
216 
217  template<typename T>
218  struct HIP_vector_base<T, 3> {
219  struct Native_vec_ {
220  T d[3];
221 
222  __HOST_DEVICE__
223  Native_vec_() = default;
224 
225  __HOST_DEVICE__
226  explicit
227  constexpr
228  Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
229  __HOST_DEVICE__
230  constexpr
231  Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
232  __HOST_DEVICE__
233  constexpr
234  Native_vec_(const Native_vec_&) = default;
235  __HOST_DEVICE__
236  constexpr
237  Native_vec_(Native_vec_&&) = default;
238  __HOST_DEVICE__
239  ~Native_vec_() = default;
240 
241  __HOST_DEVICE__
242  Native_vec_& operator=(const Native_vec_&) = default;
243  __HOST_DEVICE__
244  Native_vec_& operator=(Native_vec_&&) = default;
245 
246  __HOST_DEVICE__
247  T& operator[](unsigned int idx) noexcept { return d[idx]; }
248  __HOST_DEVICE__
249  T operator[](unsigned int idx) const noexcept { return d[idx]; }
250 
251  __HOST_DEVICE__
252  Native_vec_& operator+=(const Native_vec_& x_) noexcept
253  {
254  for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
255  return *this;
256  }
257  __HOST_DEVICE__
258  Native_vec_& operator-=(const Native_vec_& x_) noexcept
259  {
260  for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
261  return *this;
262  }
263 
264  __HOST_DEVICE__
265  Native_vec_& operator*=(const Native_vec_& x_) noexcept
266  {
267  for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
268  return *this;
269  }
270  __HOST_DEVICE__
271  Native_vec_& operator/=(const Native_vec_& x_) noexcept
272  {
273  for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
274  return *this;
275  }
276 
277  template<
278  typename U = T,
279  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
280  __HOST_DEVICE__
281  Native_vec_ operator-() const noexcept
282  {
283  auto r{*this};
284  for (auto&& x : r.d) x = -x;
285  return r;
286  }
287 
288  template<
289  typename U = T,
290  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
291  __HOST_DEVICE__
292  Native_vec_ operator~() const noexcept
293  {
294  auto r{*this};
295  for (auto&& x : r.d) x = ~x;
296  return r;
297  }
298  template<
299  typename U = T,
300  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
301  __HOST_DEVICE__
302  Native_vec_& operator%=(const Native_vec_& x_) noexcept
303  {
304  for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
305  return *this;
306  }
307  template<
308  typename U = T,
309  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
310  __HOST_DEVICE__
311  Native_vec_& operator^=(const Native_vec_& x_) noexcept
312  {
313  for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
314  return *this;
315  }
316  template<
317  typename U = T,
318  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
319  __HOST_DEVICE__
320  Native_vec_& operator|=(const Native_vec_& x_) noexcept
321  {
322  for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
323  return *this;
324  }
325  template<
326  typename U = T,
327  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
328  __HOST_DEVICE__
329  Native_vec_& operator&=(const Native_vec_& x_) noexcept
330  {
331  for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
332  return *this;
333  }
334  template<
335  typename U = T,
336  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
337  __HOST_DEVICE__
338  Native_vec_& operator>>=(const Native_vec_& x_) noexcept
339  {
340  for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
341  return *this;
342  }
343  template<
344  typename U = T,
345  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
346  __HOST_DEVICE__
347  Native_vec_& operator<<=(const Native_vec_& x_) noexcept
348  {
349  for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
350  return *this;
351  }
352 #if defined (__INTEL_COMPILER)
353  typedef struct {
354  int values[4];
355  } _Vec3_cmp;
356  using Vec3_cmp = _Vec3_cmp;
357 #else
358  using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
359 #endif //INTEL
360  __HOST_DEVICE__
361  Vec3_cmp operator==(const Native_vec_& x_) const noexcept
362  {
363  return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
364  }
365  };
366 
367  union {
368  Native_vec_ data;
369  struct {
370  T x;
371  T y;
372  T z;
373  };
374  };
375 
376  using value_type = T;
377 
378  __HOST_DEVICE__
379  HIP_vector_base() = default;
380  __HOST_DEVICE__
381  explicit
382  constexpr
383  HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
384  __HOST_DEVICE__
385  constexpr
386  HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
387  __HOST_DEVICE__
388  constexpr
389  HIP_vector_base(const HIP_vector_base&) = default;
390  __HOST_DEVICE__
391  constexpr
392  HIP_vector_base(HIP_vector_base&&) = default;
393  __HOST_DEVICE__
394  ~HIP_vector_base() = default;
395 
396  __HOST_DEVICE__
397  HIP_vector_base& operator=(const HIP_vector_base&) = default;
398  __HOST_DEVICE__
399  HIP_vector_base& operator=(HIP_vector_base&&) = default;
400  };
401 
402  template<typename T>
403  struct HIP_vector_base<T, 4> {
404  using Native_vec_ = __NATIVE_VECTOR__(4, T);
405 
406  union
407  #if !__has_attribute(ext_vector_type)
408  alignas(hip_impl::next_pot(4 * sizeof(T)))
409  #endif
410  {
411  Native_vec_ data;
412  struct {
413  T x;
414  T y;
415  T z;
416  T w;
417  };
418  };
419 
420  using value_type = T;
421 
422  __HOST_DEVICE__
423  HIP_vector_base() = default;
424  __HOST_DEVICE__
425  explicit
426  constexpr
427  HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
428  __HOST_DEVICE__
429  constexpr
430  HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
431  __HOST_DEVICE__
432  constexpr
433  HIP_vector_base(const HIP_vector_base&) = default;
434  __HOST_DEVICE__
435  constexpr
436  HIP_vector_base(HIP_vector_base&&) = default;
437  __HOST_DEVICE__
438  ~HIP_vector_base() = default;
439  __HOST_DEVICE__
440  HIP_vector_base& operator=(const HIP_vector_base&) = default;
441  };
442 
443  template<typename T, unsigned int rank>
444  struct HIP_vector_type : public HIP_vector_base<T, rank> {
445  using HIP_vector_base<T, rank>::data;
446  using typename HIP_vector_base<T, rank>::Native_vec_;
447 
448  __HOST_DEVICE__
449  HIP_vector_type() = default;
450  template<
451  typename U,
452  typename std::enable_if<
453  std::is_convertible<U, T>::value>::type* = nullptr>
454  __HOST_DEVICE__
455  explicit
456  constexpr
457  HIP_vector_type(U x_) noexcept
458  : HIP_vector_base<T, rank>{static_cast<T>(x_)}
459  {}
460  template< // TODO: constrain based on type as well.
461  typename... Us,
462  typename std::enable_if<
463  (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
464  __HOST_DEVICE__
465  constexpr
466  HIP_vector_type(Us... xs) noexcept
467  : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
468  {}
469  __HOST_DEVICE__
470  constexpr
471  HIP_vector_type(const HIP_vector_type&) = default;
472  __HOST_DEVICE__
473  constexpr
474  HIP_vector_type(HIP_vector_type&&) = default;
475  __HOST_DEVICE__
476  ~HIP_vector_type() = default;
477 
478  __HOST_DEVICE__
479  HIP_vector_type& operator=(const HIP_vector_type&) = default;
480  __HOST_DEVICE__
481  HIP_vector_type& operator=(HIP_vector_type&&) = default;
482 
483  // Operators
484  __HOST_DEVICE__
485  HIP_vector_type& operator++() noexcept
486  {
487  return *this += HIP_vector_type{1};
488  }
489  __HOST_DEVICE__
490  HIP_vector_type operator++(int) noexcept
491  {
492  auto tmp(*this);
493  ++*this;
494  return tmp;
495  }
496 
497  __HOST_DEVICE__
498  HIP_vector_type& operator--() noexcept
499  {
500  return *this -= HIP_vector_type{1};
501  }
502  __HOST_DEVICE__
503  HIP_vector_type operator--(int) noexcept
504  {
505  auto tmp(*this);
506  --*this;
507  return tmp;
508  }
509 
510  __HOST_DEVICE__
511  HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
512  {
513 #if __HIP_USE_NATIVE_VECTOR__
514  data += x.data;
515 #else
516  for (auto i = 0u; i != rank; ++i) data[i] += x.data[i];
517 #endif
518  return *this;
519  }
520  template<
521  typename U,
522  typename std::enable_if<
523  std::is_convertible<U, T>{}>::type* = nullptr>
524  __HOST_DEVICE__
525  HIP_vector_type& operator+=(U x) noexcept
526  {
527  return *this += HIP_vector_type{x};
528  }
529 
530  __HOST_DEVICE__
531  HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
532  {
533 #if __HIP_USE_NATIVE_VECTOR__
534  data -= x.data;
535 #else
536  for (auto i = 0u; i != rank; ++i) data[i] -= x.data[i];
537 #endif
538  return *this;
539  }
540  template<
541  typename U,
542  typename std::enable_if<
543  std::is_convertible<U, T>{}>::type* = nullptr>
544  __HOST_DEVICE__
545  HIP_vector_type& operator-=(U x) noexcept
546  {
547  return *this -= HIP_vector_type{x};
548  }
549 
550  __HOST_DEVICE__
551  HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
552  {
553 #if __HIP_USE_NATIVE_VECTOR__
554  data *= x.data;
555 #else
556  for (auto i = 0u; i != rank; ++i) data[i] *= x.data[i];
557 #endif
558  return *this;
559  }
560 
561  friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
562  HIP_vector_type x, const HIP_vector_type& y) noexcept
563  {
564  return HIP_vector_type{ x } *= y;
565  }
566 
567  template<
568  typename U,
569  typename std::enable_if<
570  std::is_convertible<U, T>{}>::type* = nullptr>
571  __HOST_DEVICE__
572  HIP_vector_type& operator*=(U x) noexcept
573  {
574  return *this *= HIP_vector_type{x};
575  }
576 
577  friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
578  HIP_vector_type x, const HIP_vector_type& y) noexcept
579  {
580  return HIP_vector_type{ x } /= y;
581  }
582 
583  __HOST_DEVICE__
584  HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
585  {
586 #if __HIP_USE_NATIVE_VECTOR__
587  data /= x.data;
588 #else
589  for (auto i = 0u; i != rank; ++i) data[i] /= x.data[i];
590 #endif
591  return *this;
592  }
593  template<
594  typename U,
595  typename std::enable_if<
596  std::is_convertible<U, T>{}>::type* = nullptr>
597  __HOST_DEVICE__
598  HIP_vector_type& operator/=(U x) noexcept
599  {
600  return *this /= HIP_vector_type{x};
601  }
602 
603  template<
604  typename U = T,
605  typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
606  __HOST_DEVICE__
607  HIP_vector_type operator-() const noexcept
608  {
609  auto tmp(*this);
610 #if __HIP_USE_NATIVE_VECTOR__
611  tmp.data = -tmp.data;
612 #else
613  for (auto i = 0u; i != rank; ++i) tmp.data[i] = -tmp.data[i];
614 #endif
615  return tmp;
616  }
617 
618  template<
619  typename U = T,
620  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
621  __HOST_DEVICE__
622  HIP_vector_type operator~() const noexcept
623  {
624  HIP_vector_type r{*this};
625 #if __HIP_USE_NATIVE_VECTOR__
626  r.data = ~r.data;
627 #else
628  for (auto i = 0u; i != rank; ++i) r.data[i] = ~r.data[i];
629 #endif
630  return r;
631  }
632 
633  template<
634  typename U = T,
635  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
636  __HOST_DEVICE__
637  HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
638  {
639 #if __HIP_USE_NATIVE_VECTOR__
640  data %= x.data;
641 #else
642  for (auto i = 0u; i != rank; ++i) data[i] %= x.data[i];
643 #endif
644  return *this;
645  }
646 
647  template<
648  typename U = T,
649  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
650  __HOST_DEVICE__
651  HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
652  {
653 #if __HIP_USE_NATIVE_VECTOR__
654  data ^= x.data;
655 #else
656  for (auto i = 0u; i != rank; ++i) data[i] ^= x.data[i];
657 #endif
658  return *this;
659  }
660 
661  template<
662  typename U = T,
663  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
664  __HOST_DEVICE__
665  HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
666  {
667 #if __HIP_USE_NATIVE_VECTOR__
668  data |= x.data;
669 #else
670  for (auto i = 0u; i != rank; ++i) data[i] |= x.data[i];
671 #endif
672  return *this;
673  }
674 
675  template<
676  typename U = T,
677  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
678  __HOST_DEVICE__
679  HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
680  {
681 #if __HIP_USE_NATIVE_VECTOR__
682  data &= x.data;
683 #else
684  for (auto i = 0u; i != rank; ++i) data[i] &= x.data[i];
685 #endif
686  return *this;
687  }
688 
689  template<
690  typename U = T,
691  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
692  __HOST_DEVICE__
693  HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
694  {
695 #if __HIP_USE_NATIVE_VECTOR__
696  data >>= x.data;
697 #else
698  for (auto i = 0u; i != rank; ++i) data[i] >>= x.data[i];
699 #endif
700  return *this;
701  }
702 
703  template<
704  typename U = T,
705  typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
706  __HOST_DEVICE__
707  HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
708  {
709 #if __HIP_USE_NATIVE_VECTOR__
710  data <<= x.data;
711 #else
712  for (auto i = 0u; i != rank; ++i) data[i] <<= x.data[i];
713 #endif
714  return *this;
715  }
716  };
717 
718  template<typename T, unsigned int n>
719  __HOST_DEVICE__
720  inline
721  constexpr
722  HIP_vector_type<T, n> operator+(
723  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
724  {
725  return HIP_vector_type<T, n>{x} += y;
726  }
727  template<typename T, unsigned int n, typename U>
728  __HOST_DEVICE__
729  inline
730  constexpr
731  HIP_vector_type<T, n> operator+(
732  const HIP_vector_type<T, n>& x, U y) noexcept
733  {
734  return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
735  }
736  template<typename T, unsigned int n, typename U>
737  __HOST_DEVICE__
738  inline
739  constexpr
740  HIP_vector_type<T, n> operator+(
741  U x, const HIP_vector_type<T, n>& y) noexcept
742  {
743  return HIP_vector_type<T, n>{x} += y;
744  }
745 
746  template<typename T, unsigned int n>
747  __HOST_DEVICE__
748  inline
749  constexpr
750  HIP_vector_type<T, n> operator-(
751  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
752  {
753  return HIP_vector_type<T, n>{x} -= y;
754  }
755  template<typename T, unsigned int n, typename U>
756  __HOST_DEVICE__
757  inline
758  constexpr
759  HIP_vector_type<T, n> operator-(
760  const HIP_vector_type<T, n>& x, U y) noexcept
761  {
762  return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
763  }
764  template<typename T, unsigned int n, typename U>
765  __HOST_DEVICE__
766  inline
767  constexpr
768  HIP_vector_type<T, n> operator-(
769  U x, const HIP_vector_type<T, n>& y) noexcept
770  {
771  return HIP_vector_type<T, n>{x} -= y;
772  }
773 
774  template<typename T, unsigned int n, typename U>
775  __HOST_DEVICE__
776  inline
777  constexpr
778  HIP_vector_type<T, n> operator*(
779  const HIP_vector_type<T, n>& x, U y) noexcept
780  {
781  return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
782  }
783  template<typename T, unsigned int n, typename U>
784  __HOST_DEVICE__
785  inline
786  constexpr
787  HIP_vector_type<T, n> operator*(
788  U x, const HIP_vector_type<T, n>& y) noexcept
789  {
790  return HIP_vector_type<T, n>{x} *= y;
791  }
792 
793  template<typename T, unsigned int n, typename U>
794  __HOST_DEVICE__
795  inline
796  constexpr
797  HIP_vector_type<T, n> operator/(
798  const HIP_vector_type<T, n>& x, U y) noexcept
799  {
800  return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
801  }
802  template<typename T, unsigned int n, typename U>
803  __HOST_DEVICE__
804  inline
805  constexpr
806  HIP_vector_type<T, n> operator/(
807  U x, const HIP_vector_type<T, n>& y) noexcept
808  {
809  return HIP_vector_type<T, n>{x} /= y;
810  }
811 
812  template<typename V>
813  __HOST_DEVICE__
814  inline
815  constexpr
816  bool _hip_compare(const V& x, const V& y, int n) noexcept
817  {
818  return
819  (n == -1) ? true : ((x[n] != y[n]) ? false : _hip_compare(x, y, n - 1));
820  }
821 
822  template<typename T, unsigned int n>
823  __HOST_DEVICE__
824  inline
825  constexpr
826  bool operator==(
827  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
828  {
829  return _hip_compare(x.data, y.data, n - 1);
830  }
831  template<typename T, unsigned int n, typename U>
832  __HOST_DEVICE__
833  inline
834  constexpr
835  bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
836  {
837  return x == HIP_vector_type<T, n>{y};
838  }
839  template<typename T, unsigned int n, typename U>
840  __HOST_DEVICE__
841  inline
842  constexpr
843  bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
844  {
845  return HIP_vector_type<T, n>{x} == y;
846  }
847 
848  template<typename T, unsigned int n>
849  __HOST_DEVICE__
850  inline
851  constexpr
852  bool operator!=(
853  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
854  {
855  return !(x == y);
856  }
857  template<typename T, unsigned int n, typename U>
858  __HOST_DEVICE__
859  inline
860  constexpr
861  bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
862  {
863  return !(x == y);
864  }
865  template<typename T, unsigned int n, typename U>
866  __HOST_DEVICE__
867  inline
868  constexpr
869  bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
870  {
871  return !(x == y);
872  }
873 
874  template<
875  typename T,
876  unsigned int n,
877  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
878  __HOST_DEVICE__
879  inline
880  constexpr
881  HIP_vector_type<T, n> operator%(
882  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
883  {
884  return HIP_vector_type<T, n>{x} %= y;
885  }
886  template<
887  typename T,
888  unsigned int n,
889  typename U,
890  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
891  __HOST_DEVICE__
892  inline
893  constexpr
894  HIP_vector_type<T, n> operator%(
895  const HIP_vector_type<T, n>& x, U y) noexcept
896  {
897  return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
898  }
899  template<
900  typename T,
901  unsigned int n,
902  typename U,
903  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
904  __HOST_DEVICE__
905  inline
906  constexpr
907  HIP_vector_type<T, n> operator%(
908  U x, const HIP_vector_type<T, n>& y) noexcept
909  {
910  return HIP_vector_type<T, n>{x} %= y;
911  }
912 
913  template<
914  typename T,
915  unsigned int n,
916  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
917  __HOST_DEVICE__
918  inline
919  constexpr
920  HIP_vector_type<T, n> operator^(
921  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
922  {
923  return HIP_vector_type<T, n>{x} ^= y;
924  }
925  template<
926  typename T,
927  unsigned int n,
928  typename U,
929  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
930  __HOST_DEVICE__
931  inline
932  constexpr
933  HIP_vector_type<T, n> operator^(
934  const HIP_vector_type<T, n>& x, U y) noexcept
935  {
936  return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
937  }
938  template<
939  typename T,
940  unsigned int n,
941  typename U,
942  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
943  __HOST_DEVICE__
944  inline
945  constexpr
946  HIP_vector_type<T, n> operator^(
947  U x, const HIP_vector_type<T, n>& y) noexcept
948  {
949  return HIP_vector_type<T, n>{x} ^= y;
950  }
951 
952  template<
953  typename T,
954  unsigned int n,
955  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
956  __HOST_DEVICE__
957  inline
958  constexpr
959  HIP_vector_type<T, n> operator|(
960  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
961  {
962  return HIP_vector_type<T, n>{x} |= y;
963  }
964  template<
965  typename T,
966  unsigned int n,
967  typename U,
968  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
969  __HOST_DEVICE__
970  inline
971  constexpr
972  HIP_vector_type<T, n> operator|(
973  const HIP_vector_type<T, n>& x, U y) noexcept
974  {
975  return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
976  }
977  template<
978  typename T,
979  unsigned int n,
980  typename U,
981  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
982  __HOST_DEVICE__
983  inline
984  constexpr
985  HIP_vector_type<T, n> operator|(
986  U x, const HIP_vector_type<T, n>& y) noexcept
987  {
988  return HIP_vector_type<T, n>{x} |= y;
989  }
990 
991  template<
992  typename T,
993  unsigned int n,
994  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
995  __HOST_DEVICE__
996  inline
997  constexpr
998  HIP_vector_type<T, n> operator&(
999  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1000  {
1001  return HIP_vector_type<T, n>{x} &= y;
1002  }
1003  template<
1004  typename T,
1005  unsigned int n,
1006  typename U,
1007  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1008  __HOST_DEVICE__
1009  inline
1010  constexpr
1011  HIP_vector_type<T, n> operator&(
1012  const HIP_vector_type<T, n>& x, U y) noexcept
1013  {
1014  return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1015  }
1016  template<
1017  typename T,
1018  unsigned int n,
1019  typename U,
1020  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1021  __HOST_DEVICE__
1022  inline
1023  constexpr
1024  HIP_vector_type<T, n> operator&(
1025  U x, const HIP_vector_type<T, n>& y) noexcept
1026  {
1027  return HIP_vector_type<T, n>{x} &= y;
1028  }
1029 
1030  template<
1031  typename T,
1032  unsigned int n,
1033  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1034  __HOST_DEVICE__
1035  inline
1036  constexpr
1037  HIP_vector_type<T, n> operator>>(
1038  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1039  {
1040  return HIP_vector_type<T, n>{x} >>= y;
1041  }
1042  template<
1043  typename T,
1044  unsigned int n,
1045  typename U,
1046  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1047  __HOST_DEVICE__
1048  inline
1049  constexpr
1050  HIP_vector_type<T, n> operator>>(
1051  const HIP_vector_type<T, n>& x, U y) noexcept
1052  {
1053  return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1054  }
1055  template<
1056  typename T,
1057  unsigned int n,
1058  typename U,
1059  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1060  __HOST_DEVICE__
1061  inline
1062  constexpr
1063  HIP_vector_type<T, n> operator>>(
1064  U x, const HIP_vector_type<T, n>& y) noexcept
1065  {
1066  return HIP_vector_type<T, n>{x} >>= y;
1067  }
1068 
1069  template<
1070  typename T,
1071  unsigned int n,
1072  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1073  __HOST_DEVICE__
1074  inline
1075  constexpr
1076  HIP_vector_type<T, n> operator<<(
1077  const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1078  {
1079  return HIP_vector_type<T, n>{x} <<= y;
1080  }
1081  template<
1082  typename T,
1083  unsigned int n,
1084  typename U,
1085  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1086  __HOST_DEVICE__
1087  inline
1088  constexpr
1089  HIP_vector_type<T, n> operator<<(
1090  const HIP_vector_type<T, n>& x, U y) noexcept
1091  {
1092  return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1093  }
1094  template<
1095  typename T,
1096  unsigned int n,
1097  typename U,
1098  typename std::enable_if<std::is_arithmetic<U>::value>::type,
1099  typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1100  __HOST_DEVICE__
1101  inline
1102  constexpr
1103  HIP_vector_type<T, n> operator<<(
1104  U x, const HIP_vector_type<T, n>& y) noexcept
1105  {
1106  return HIP_vector_type<T, n>{x} <<= y;
1107  }
1108 
1109  /*
1110  * Map HIP_vector_type<U, rankU> to HIP_vector_type<T, rankT>
1111  */
1112  template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1113  __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 1 && rankU >= 1),
1114  const HIP_vector_type<T, rankT>>::type
1115  __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1116  return HIP_vector_type<T, rankT>(static_cast<T>(u.x));
1117  };
1118 
1119  template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1120  __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU == 1),
1121  const HIP_vector_type<T, rankT>>::type
1122  __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1123  return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0));
1124  };
1125 
1126  template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1127  __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU >= 2),
1128  const HIP_vector_type<T, rankT>>::type
1129  __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1130  return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y));
1131  };
1132 
1133  template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1134  __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 1),
1135  const HIP_vector_type<T, rankT>>::type
1136  __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1137  return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0),
1138  static_cast<T>(0), static_cast<T>(0));
1139  };
1140 
1141  template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1142  __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 2),
1143  const HIP_vector_type<T, rankT>>::type
1144  __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1145  return HIP_vector_type<T, rankT>(static_cast<T>(u.x), static_cast<T>(u.y),
1146  static_cast<T>(0), static_cast<T>(0));
1147  };
1148 
1149  template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1150  __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 4),
1151  const HIP_vector_type<T, rankT>>::type
1152  __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1153  return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y),
1154  static_cast<T>(u.z), static_cast<T>(u.w));
1155  };
1156 
1157  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1158  using CUDA_name##1 = HIP_vector_type<T, 1>;\
1159  using CUDA_name##2 = HIP_vector_type<T, 2>;\
1160  using CUDA_name##3 = HIP_vector_type<T, 3>;\
1161  using CUDA_name##4 = HIP_vector_type<T, 4>;
1162 #else
1163  #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1164  typedef struct {\
1165  T x;\
1166  } CUDA_name##1;\
1167  typedef struct {\
1168  T x;\
1169  T y;\
1170  } CUDA_name##2;\
1171  typedef struct {\
1172  T x;\
1173  T y;\
1174  T z;\
1175  } CUDA_name##3;\
1176  typedef struct {\
1177  T x;\
1178  T y;\
1179  T z;\
1180  T w;\
1181  } CUDA_name##4;
1182 #endif
1183 
1184 __MAKE_VECTOR_TYPE__(uchar, unsigned char);
1185 __MAKE_VECTOR_TYPE__(char, char);
1186 __MAKE_VECTOR_TYPE__(ushort, unsigned short);
1187 __MAKE_VECTOR_TYPE__(short, short);
1188 __MAKE_VECTOR_TYPE__(uint, unsigned int);
1189 __MAKE_VECTOR_TYPE__(int, int);
1190 __MAKE_VECTOR_TYPE__(ulong, unsigned long);
1191 __MAKE_VECTOR_TYPE__(long, long);
1192 __MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1193 __MAKE_VECTOR_TYPE__(longlong, long long);
1194 __MAKE_VECTOR_TYPE__(float, float);
1195 __MAKE_VECTOR_TYPE__(double, double);
1196 
1197 #else // !defined(__has_attribute)
1198 
1199 #if defined(_MSC_VER)
1200 #include <mmintrin.h>
1201 #include <xmmintrin.h>
1202 #include <emmintrin.h>
1203 #include <immintrin.h>
1204 
1205 /*
1206 this is for compatibility with CUDA as CUDA allows accessing vector components
1207 in C++ program with MSVC
1208 */
1209 typedef union {
1210  struct {
1211  char x;
1212  };
1213  char data;
1214 } char1;
1215 typedef union {
1216  struct {
1217  char x;
1218  char y;
1219  };
1220  char data[2];
1221 } char2;
1222 typedef union {
1223  struct {
1224  char x;
1225  char y;
1226  char z;
1227  char w;
1228  };
1229  char data[4];
1230 } char4;
1231 typedef union {
1232  struct {
1233  char x;
1234  char y;
1235  char z;
1236  };
1237  char data[3];
1238 } char3;
1239 typedef union {
1240  __m64 data;
1241 } char8;
1242 typedef union {
1243  __m128i data;
1244 } char16;
1245 
1246 typedef union {
1247  struct {
1248  unsigned char x;
1249  };
1250  unsigned char data;
1251 } uchar1;
1252 typedef union {
1253  struct {
1254  unsigned char x;
1255  unsigned char y;
1256  };
1257  unsigned char data[2];
1258 } uchar2;
1259 typedef union {
1260  struct {
1261  unsigned char x;
1262  unsigned char y;
1263  unsigned char z;
1264  unsigned char w;
1265  };
1266  unsigned char data[4];
1267 } uchar4;
1268 typedef union {
1269  struct {
1270  unsigned char x;
1271  unsigned char y;
1272  unsigned char z;
1273  };
1274  unsigned char data[3];
1275 } uchar3;
1276 typedef union {
1277  __m64 data;
1278 } uchar8;
1279 typedef union {
1280  __m128i data;
1281 } uchar16;
1282 
1283 typedef union {
1284  struct {
1285  short x;
1286  };
1287  short data;
1288 } short1;
1289 typedef union {
1290  struct {
1291  short x;
1292  short y;
1293  };
1294  short data[2];
1295 } short2;
1296 typedef union {
1297  struct {
1298  short x;
1299  short y;
1300  short z;
1301  short w;
1302  };
1303  __m64 data;
1304 } short4;
1305 typedef union {
1306  struct {
1307  short x;
1308  short y;
1309  short z;
1310  };
1311  short data[3];
1312 } short3;
1313 typedef union {
1314  __m128i data;
1315 } short8;
1316 typedef union {
1317  __m128i data[2];
1318 } short16;
1319 
1320 typedef union {
1321  struct {
1322  unsigned short x;
1323  };
1324  unsigned short data;
1325 } ushort1;
1326 typedef union {
1327  struct {
1328  unsigned short x;
1329  unsigned short y;
1330  };
1331  unsigned short data[2];
1332 } ushort2;
1333 typedef union {
1334  struct {
1335  unsigned short x;
1336  unsigned short y;
1337  unsigned short z;
1338  unsigned short w;
1339  };
1340  __m64 data;
1341 } ushort4;
1342 typedef union {
1343  struct {
1344  unsigned short x;
1345  unsigned short y;
1346  unsigned short z;
1347  };
1348  unsigned short data[3];
1349 } ushort3;
1350 typedef union {
1351  __m128i data;
1352 } ushort8;
1353 typedef union {
1354  __m128i data[2];
1355 } ushort16;
1356 
1357 typedef union {
1358  struct {
1359  int x;
1360  };
1361  int data;
1362 } int1;
1363 typedef union {
1364  struct {
1365  int x;
1366  int y;
1367  };
1368  __m64 data;
1369 } int2;
1370 typedef union {
1371  struct {
1372  int x;
1373  int y;
1374  int z;
1375  int w;
1376  };
1377  __m128i data;
1378 } int4;
1379 typedef union {
1380  struct {
1381  int x;
1382  int y;
1383  int z;
1384  };
1385  int data[3];
1386 } int3;
1387 typedef union {
1388  __m128i data[2];
1389 } int8;
1390 typedef union {
1391  __m128i data[4];
1392 } int16;
1393 
1394 typedef union {
1395  struct {
1396  unsigned int x;
1397  };
1398  unsigned int data;
1399 } uint1;
1400 typedef union {
1401  struct {
1402  unsigned int x;
1403  unsigned int y;
1404  };
1405  __m64 data;
1406 } uint2;
1407 typedef union {
1408  struct {
1409  unsigned int x;
1410  unsigned int y;
1411  unsigned int z;
1412  unsigned int w;
1413  };
1414  __m128i data;
1415 } uint4;
1416 typedef union {
1417  struct {
1418  unsigned int x;
1419  unsigned int y;
1420  unsigned int z;
1421  };
1422  unsigned int data[3];
1423 } uint3;
1424 typedef union {
1425  __m128i data[2];
1426 } uint8;
1427 typedef union {
1428  __m128i data[4];
1429 } uint16;
1430 
1431 typedef union {
1432  struct {
1433  int x;
1434  };
1435  int data;
1436 } long1;
1437 typedef union {
1438  struct {
1439  int x;
1440  int y;
1441  };
1442  __m64 data;
1443 } long2;
1444 typedef union {
1445  struct {
1446  int x;
1447  int y;
1448  int z;
1449  int w;
1450  };
1451  __m128i data;
1452 } long4;
1453 typedef union {
1454  struct {
1455  int x;
1456  int y;
1457  int z;
1458  };
1459  int data[3];
1460 } long3;
1461 typedef union {
1462  __m128i data[2];
1463 } long8;
1464 typedef union {
1465  __m128i data[4];
1466 } long16;
1467 
1468 typedef union {
1469  struct {
1470  unsigned int x;
1471  };
1472  unsigned int data;
1473 } ulong1;
1474 typedef union {
1475  struct {
1476  unsigned int x;
1477  unsigned int y;
1478  };
1479  __m64 data;
1480 } ulong2;
1481 typedef union {
1482  struct {
1483  unsigned int x;
1484  unsigned int y;
1485  unsigned int z;
1486  unsigned int w;
1487  };
1488  __m128i data;
1489 } ulong4;
1490 typedef union {
1491  struct {
1492  unsigned int x;
1493  unsigned int y;
1494  unsigned int z;
1495  };
1496  unsigned int data[3];
1497 } ulong3;
1498 typedef union {
1499  __m128i data[2];
1500 } ulong8;
1501 typedef union {
1502  __m128i data[4];
1503 } ulong16;
1504 
1505 typedef union {
1506  struct {
1507  long long x;
1508  };
1509  __m64 data;
1510 } longlong1;
1511 typedef union {
1512  struct {
1513  long long x;
1514  long long y;
1515  };
1516  __m128i data;
1517 } longlong2;
1518 typedef union {
1519  struct {
1520  long long x;
1521  long long y;
1522  long long z;
1523  long long w;
1524  };
1525  __m128i data[2];
1526 } longlong4;
1527 typedef union {
1528  struct {
1529  long long x;
1530  long long y;
1531  long long z;
1532  };
1533  __m64 data[3];
1534 } longlong3;
1535 typedef union {
1536  __m128i data[4];
1537 } longlong8;
1538 typedef union {
1539  __m128i data[8];
1540 } longlong16;
1541 
1542 typedef union {
1543  struct {
1544  __m64 x;
1545  };
1546  __m64 data;
1547 } ulonglong1;
1548 typedef union {
1549  struct {
1550  __m64 x;
1551  __m64 y;
1552  };
1553  __m128i data;
1554 } ulonglong2;
1555 typedef union {
1556  struct {
1557  __m64 x;
1558  __m64 y;
1559  __m64 z;
1560  __m64 w;
1561  };
1562  __m128i data[2];
1563 } ulonglong4;
1564 typedef union {
1565  struct {
1566  __m64 x;
1567  __m64 y;
1568  __m64 z;
1569  };
1570  __m64 data[3];
1571 } ulonglong3;
1572 typedef union {
1573  __m128i data[4];
1574 } ulonglong8;
1575 typedef union {
1576  __m128i data[8];
1577 } ulonglong16;
1578 
1579 typedef union {
1580  struct {
1581  float x;
1582  };
1583  float data;
1584 } float1;
1585 typedef union {
1586  struct {
1587  float x;
1588  float y;
1589  };
1590  __m64 data;
1591 } float2;
1592 typedef union {
1593  struct {
1594  float x;
1595  float y;
1596  float z;
1597  float w;
1598  };
1599  __m128 data;
1600 } float4;
1601 typedef union {
1602  struct {
1603  float x;
1604  float y;
1605  float z;
1606  };
1607  float data[3];
1608 } float3;
1609 typedef union {
1610  __m256 data;
1611 } float8;
1612 typedef union {
1613  __m256 data[2];
1614 } float16;
1615 
1616 typedef union {
1617  struct {
1618  double x;
1619  };
1620  double data;
1621 } double1;
1622 typedef union {
1623  struct {
1624  double x;
1625  double y;
1626  };
1627  __m128d data;
1628 } double2;
1629 typedef union {
1630  struct {
1631  double x;
1632  double y;
1633  double z;
1634  double w;
1635  };
1636  __m256d data;
1637 } double4;
1638 typedef union {
1639  struct {
1640  double x;
1641  double y;
1642  double z;
1643  };
1644  double data[3];
1645 } double3;
1646 typedef union {
1647  __m256d data[2];
1648 } double8;
1649 typedef union {
1650  __m256d data[4];
1651 } double16;
1652 
1653 #else // !defined(_MSC_VER)
1654 
1655 /*
1656 this is for compatibility with CUDA as CUDA allows accessing vector components
1657 in C++ program with MSVC
1658 */
1659 typedef union {
1660  struct {
1661  char x;
1662  };
1663  char data;
1664 } char1;
1665 typedef union {
1666  struct {
1667  char x;
1668  char y;
1669  };
1670  char data[2];
1671 } char2;
1672 typedef union {
1673  struct {
1674  char x;
1675  char y;
1676  char z;
1677  char w;
1678  };
1679  char data[4];
1680 } char4;
1681 typedef union {
1682  char data[8];
1683 } char8;
1684 typedef union {
1685  char data[16];
1686 } char16;
1687 typedef union {
1688  struct {
1689  char x;
1690  char y;
1691  char z;
1692  };
1693  char data[3];
1694 } char3;
1695 
1696 typedef union {
1697  struct {
1698  unsigned char x;
1699  };
1700  unsigned char data;
1701 } uchar1;
1702 typedef union {
1703  struct {
1704  unsigned char x;
1705  unsigned char y;
1706  };
1707  unsigned char data[2];
1708 } uchar2;
1709 typedef union {
1710  struct {
1711  unsigned char x;
1712  unsigned char y;
1713  unsigned char z;
1714  unsigned char w;
1715  };
1716  unsigned char data[4];
1717 } uchar4;
1718 typedef union {
1719  unsigned char data[8];
1720 } uchar8;
1721 typedef union {
1722  unsigned char data[16];
1723 } uchar16;
1724 typedef union {
1725  struct {
1726  unsigned char x;
1727  unsigned char y;
1728  unsigned char z;
1729  };
1730  unsigned char data[3];
1731 } uchar3;
1732 
1733 typedef union {
1734  struct {
1735  short x;
1736  };
1737  short data;
1738 } short1;
1739 typedef union {
1740  struct {
1741  short x;
1742  short y;
1743  };
1744  short data[2];
1745 } short2;
1746 typedef union {
1747  struct {
1748  short x;
1749  short y;
1750  short z;
1751  short w;
1752  };
1753  short data[4];
1754 } short4;
1755 typedef union {
1756  short data[8];
1757 } short8;
1758 typedef union {
1759  short data[16];
1760 } short16;
1761 typedef union {
1762  struct {
1763  short x;
1764  short y;
1765  short z;
1766  };
1767  short data[3];
1768 } short3;
1769 
1770 typedef union {
1771  struct {
1772  unsigned short x;
1773  };
1774  unsigned short data;
1775 } ushort1;
1776 typedef union {
1777  struct {
1778  unsigned short x;
1779  unsigned short y;
1780  };
1781  unsigned short data[2];
1782 } ushort2;
1783 typedef union {
1784  struct {
1785  unsigned short x;
1786  unsigned short y;
1787  unsigned short z;
1788  unsigned short w;
1789  };
1790  unsigned short data[4];
1791 } ushort4;
1792 typedef union {
1793  unsigned short data[8];
1794 } ushort8;
1795 typedef union {
1796  unsigned short data[16];
1797 } ushort16;
1798 typedef union {
1799  struct {
1800  unsigned short x;
1801  unsigned short y;
1802  unsigned short z;
1803  };
1804  unsigned short data[3];
1805 } ushort3;
1806 
1807 typedef union {
1808  struct {
1809  int x;
1810  };
1811  int data;
1812 } int1;
1813 typedef union {
1814  struct {
1815  int x;
1816  int y;
1817  };
1818  int data[2];
1819 } int2;
1820 typedef union {
1821  struct {
1822  int x;
1823  int y;
1824  int z;
1825  int w;
1826  };
1827  int data[4];
1828 } int4;
1829 typedef union {
1830  int data[8];
1831 } int8;
1832 typedef union {
1833  int data[16];
1834 } int16;
1835 typedef union {
1836  struct {
1837  int x;
1838  int y;
1839  int z;
1840  };
1841  int data[3];
1842 } int3;
1843 
1844 typedef union {
1845  struct {
1846  unsigned int x;
1847  };
1848  unsigned int data;
1849 } uint1;
1850 typedef union {
1851  struct {
1852  unsigned int x;
1853  unsigned int y;
1854  };
1855  unsigned int data[2];
1856 } uint2;
1857 typedef union {
1858  struct {
1859  unsigned int x;
1860  unsigned int y;
1861  unsigned int z;
1862  unsigned int w;
1863  };
1864  unsigned int data[4];
1865 } uint4;
1866 typedef union {
1867  unsigned int data[8];
1868 } uint8;
1869 typedef union {
1870  unsigned int data[16];
1871 } uint16;
1872 typedef union {
1873  struct {
1874  unsigned int x;
1875  unsigned int y;
1876  unsigned int z;
1877  };
1878  unsigned int data[3];
1879 } uint3;
1880 
1881 typedef union {
1882  struct {
1883  long x;
1884  };
1885  long data;
1886 } long1;
1887 typedef union {
1888  struct {
1889  long x;
1890  long y;
1891  };
1892  long data[2];
1893 } long2;
1894 typedef union {
1895  struct {
1896  long x;
1897  long y;
1898  long z;
1899  long w;
1900  };
1901  long data[4];
1902 } long4;
1903 typedef union {
1904  long data[8];
1905 } long8;
1906 typedef union {
1907  long data[16];
1908 } long16;
1909 typedef union {
1910  struct {
1911  long x;
1912  long y;
1913  long z;
1914  };
1915  long data[3];
1916 } long3;
1917 
1918 typedef union {
1919  struct {
1920  unsigned long x;
1921  };
1922  unsigned long data;
1923 } ulong1;
1924 typedef union {
1925  struct {
1926  unsigned long x;
1927  unsigned long y;
1928  };
1929  unsigned long data[2];
1930 } ulong2;
1931 typedef union {
1932  struct {
1933  unsigned long x;
1934  unsigned long y;
1935  unsigned long z;
1936  unsigned long w;
1937  };
1938  unsigned long data[4];
1939 } ulong4;
1940 typedef union {
1941  unsigned long data[8];
1942 } ulong8;
1943 typedef union {
1944  unsigned long data[16];
1945 } ulong16;
1946 typedef union {
1947  struct {
1948  unsigned long x;
1949  unsigned long y;
1950  unsigned long z;
1951  };
1952  unsigned long data[3];
1953 } ulong3;
1954 
1955 typedef union {
1956  struct {
1957  long long x;
1958  };
1959  long long data;
1960 } longlong1;
1961 typedef union {
1962  struct {
1963  long long x;
1964  long long y;
1965  };
1966  long long data[2];
1967 } longlong2;
1968 typedef union {
1969  struct {
1970  long long x;
1971  long long y;
1972  long long z;
1973  long long w;
1974  };
1975  long long data[4];
1976 } longlong4;
1977 typedef union {
1978  long long data[8];
1979 } longlong8;
1980 typedef union {
1981  long long data[16];
1982 } longlong16;
1983 typedef union {
1984  struct {
1985  long long x;
1986  long long y;
1987  long long z;
1988  };
1989  long long data[3];
1990 } longlong3;
1991 
1992 typedef union {
1993  struct {
1994  unsigned long long x;
1995  };
1996  unsigned long long data;
1997 } ulonglong1;
1998 typedef union {
1999  struct {
2000  unsigned long long x;
2001  unsigned long long y;
2002  };
2003  unsigned long long data[2];
2004 } ulonglong2;
2005 typedef union {
2006  struct {
2007  unsigned long long x;
2008  unsigned long long y;
2009  unsigned long long z;
2010  unsigned long long w;
2011  };
2012  unsigned long long data[4];
2013 } ulonglong4;
2014 typedef union {
2015  unsigned long long data[8];
2016 } ulonglong8;
2017 typedef union {
2018  unsigned long long data[16];
2019 } ulonglong16;
2020 typedef union {
2021  struct {
2022  unsigned long long x;
2023  unsigned long long y;
2024  unsigned long long z;
2025  };
2026  unsigned long long data[3];
2027 } ulonglong3;
2028 
2029 typedef union {
2030  struct {
2031  float x;
2032  };
2033  float data;
2034 } float1;
2035 typedef union {
2036  struct {
2037  float x;
2038  float y;
2039  };
2040  float data[2];
2041 } float2;
2042 typedef union {
2043  struct {
2044  float x;
2045  float y;
2046  float z;
2047  float w;
2048  };
2049  float data[4];
2050 } float4;
2051 typedef union {
2052  float data[8];
2053 } float8;
2054 typedef union {
2055  float data[16];
2056 } float16;
2057 typedef union {
2058  struct {
2059  float x;
2060  float y;
2061  float z;
2062  };
2063  float data[3];
2064 } float3;
2065 
2066 typedef union {
2067  struct {
2068  double x;
2069  };
2070  double data;
2071 } double1;
2072 typedef union {
2073  struct {
2074  double x;
2075  double y;
2076  };
2077  double data[2];
2078 } double2;
2079 typedef union {
2080  struct {
2081  double x;
2082  double y;
2083  double z;
2084  double w;
2085  };
2086  double data[4];
2087 } double4;
2088 typedef union {
2089  double data[8];
2090 } double8;
2091 typedef union {
2092  double data[16];
2093 } double16;
2094 typedef union {
2095  struct {
2096  double x;
2097  double y;
2098  double z;
2099  };
2100  double data[3];
2101 } double3;
2102 
2103 #endif // defined(_MSC_VER)
2104 #endif // defined(__has_attribute)
2105 
2106 #ifdef __cplusplus
2107 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2108  static inline __HOST_DEVICE__ type make_##type(comp x) { \
2109  type r{x}; \
2110  return r; \
2111  }
2112 
2113 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2114  static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2115  type r{x, y}; \
2116  return r; \
2117  }
2118 
2119 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2120  static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2121  type r{x, y, z}; \
2122  return r; \
2123  }
2124 
2125 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2126  static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2127  type r{x, y, z, w}; \
2128  return r; \
2129  }
2130 #else
2131 #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2132  static inline __HOST_DEVICE__ type make_##type(comp x) { \
2133  type r; \
2134  r.x = x; \
2135  return r; \
2136  }
2137 
2138 #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2139  static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2140  type r; \
2141  r.x = x; \
2142  r.y = y; \
2143  return r; \
2144  }
2145 
2146 #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2147  static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2148  type r; \
2149  r.x = x; \
2150  r.y = y; \
2151  r.z = z; \
2152  return r; \
2153  }
2154 
2155 #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2156  static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2157  type r; \
2158  r.x = x; \
2159  r.y = y; \
2160  r.z = z; \
2161  r.w = w; \
2162  return r; \
2163  }
2164 #endif
2165 
2166 DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
2167 DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
2168 DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
2169 DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
2170 
2171 DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
2172 DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
2173 DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
2174 DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
2175 
2176 DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
2177 DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
2178 DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
2179 DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
2180 
2181 DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
2182 DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
2183 DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
2184 DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
2185 
2186 DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
2187 DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
2188 DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
2189 DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
2190 
2191 DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
2192 DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
2193 DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
2194 DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
2195 
2196 DECLOP_MAKE_ONE_COMPONENT(float, float1);
2197 DECLOP_MAKE_TWO_COMPONENT(float, float2);
2198 DECLOP_MAKE_THREE_COMPONENT(float, float3);
2199 DECLOP_MAKE_FOUR_COMPONENT(float, float4);
2200 
2201 DECLOP_MAKE_ONE_COMPONENT(double, double1);
2202 DECLOP_MAKE_TWO_COMPONENT(double, double2);
2203 DECLOP_MAKE_THREE_COMPONENT(double, double3);
2204 DECLOP_MAKE_FOUR_COMPONENT(double, double4);
2205 
2206 DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
2207 DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
2208 DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
2209 DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
2210 
2211 DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
2212 DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
2213 DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
2214 DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
2215 
2216 DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
2217 DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
2218 DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
2219 DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
2220 
2221 DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
2222 DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
2223 DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
2224 DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
2225 
2226 #endif
Definition: amd_hip_vector_types.h:1659
Definition: amd_hip_vector_types.h:1665
Definition: amd_hip_vector_types.h:1672
Definition: amd_hip_vector_types.h:1681
Definition: amd_hip_vector_types.h:1684
Definition: amd_hip_vector_types.h:1687
Definition: amd_hip_vector_types.h:1696
Definition: amd_hip_vector_types.h:1702
Definition: amd_hip_vector_types.h:1709
Definition: amd_hip_vector_types.h:1718
Definition: amd_hip_vector_types.h:1721
Definition: amd_hip_vector_types.h:1724
Definition: amd_hip_vector_types.h:1733
Definition: amd_hip_vector_types.h:1739
Definition: amd_hip_vector_types.h:1746
Definition: amd_hip_vector_types.h:1755
Definition: amd_hip_vector_types.h:1758
Definition: amd_hip_vector_types.h:1761
Definition: amd_hip_vector_types.h:1770
Definition: amd_hip_vector_types.h:1776
Definition: amd_hip_vector_types.h:1783
Definition: amd_hip_vector_types.h:1792
Definition: amd_hip_vector_types.h:1795
Definition: amd_hip_vector_types.h:1798
Definition: amd_hip_vector_types.h:1807
Definition: amd_hip_vector_types.h:1813
Definition: amd_hip_vector_types.h:1820
Definition: amd_hip_vector_types.h:1829
Definition: amd_hip_vector_types.h:1832
Definition: amd_hip_vector_types.h:1835
Definition: amd_hip_vector_types.h:1844
Definition: amd_hip_vector_types.h:1850
Definition: amd_hip_vector_types.h:1857
Definition: amd_hip_vector_types.h:1866
Definition: amd_hip_vector_types.h:1869
Definition: amd_hip_vector_types.h:1872
Definition: amd_hip_vector_types.h:1881
Definition: amd_hip_vector_types.h:1887
Definition: amd_hip_vector_types.h:1894
Definition: amd_hip_vector_types.h:1903
Definition: amd_hip_vector_types.h:1906
Definition: amd_hip_vector_types.h:1909
Definition: amd_hip_vector_types.h:1918
Definition: amd_hip_vector_types.h:1924
Definition: amd_hip_vector_types.h:1931
Definition: amd_hip_vector_types.h:1940
Definition: amd_hip_vector_types.h:1943
Definition: amd_hip_vector_types.h:1946
Definition: amd_hip_vector_types.h:1955
Definition: amd_hip_vector_types.h:1961
Definition: amd_hip_vector_types.h:1968
Definition: amd_hip_vector_types.h:1977
Definition: amd_hip_vector_types.h:1980
Definition: amd_hip_vector_types.h:1983
Definition: amd_hip_vector_types.h:1992
Definition: amd_hip_vector_types.h:1998
Definition: amd_hip_vector_types.h:2005
Definition: amd_hip_vector_types.h:2014
Definition: amd_hip_vector_types.h:2017
Definition: amd_hip_vector_types.h:2020
Definition: amd_hip_vector_types.h:2029
Definition: amd_hip_vector_types.h:2035
Definition: amd_hip_vector_types.h:2042
Definition: amd_hip_vector_types.h:2051
Definition: amd_hip_vector_types.h:2054
Definition: amd_hip_vector_types.h:2057
Definition: amd_hip_vector_types.h:2066
Definition: amd_hip_vector_types.h:2072
Definition: amd_hip_vector_types.h:2079
Definition: amd_hip_vector_types.h:2088
Definition: amd_hip_vector_types.h:2091
Definition: amd_hip_vector_types.h:2094