25 #if !defined(__HIPCC_RTC__)
26 #include "amd_device_functions.h"
29 #if __has_builtin(__hip_atomic_compare_exchange_strong)
31 template<
bool B,
typename T,
typename F>
struct Cond_t;
33 template<
typename T,
typename F>
struct Cond_t<true, T, F> {
using type = T; };
34 template<
typename T,
typename F>
struct Cond_t<false, T, F> {
using type = F; };
36 #if !__HIP_DEVICE_COMPILE__
38 #define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
39 #define __HIP_MEMORY_SCOPE_WAVEFRONT 2
40 #define __HIP_MEMORY_SCOPE_WORKGROUP 3
41 #define __HIP_MEMORY_SCOPE_AGENT 4
42 #define __HIP_MEMORY_SCOPE_SYSTEM 5
45 #if !defined(__HIPCC_RTC__)
46 #include "amd_hip_unsafe_atomics.h"
51 int mem_order = __ATOMIC_SEQ_CST,
52 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
58 T hip_cas_expander(T* p, T x, Op op, F f) noexcept
63 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
65 if (is_shared_workaround((FP)p))
68 using U = typename Cond_t<
69 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
71 auto q = reinterpret_cast<U*>(p);
73 U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
78 op(
reinterpret_cast<T&
>(tmp1), x);
79 }
while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
80 mem_order, mem_scope));
82 return reinterpret_cast<const T&
>(tmp0);
86 int mem_order = __ATOMIC_SEQ_CST,
87 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
93 T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
98 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
100 if (is_shared_workaround((FP)p))
103 using U = typename Cond_t<
104 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
106 auto q = reinterpret_cast<U*>(p);
108 U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
109 while (cmp(x,
reinterpret_cast<const T&
>(tmp)) &&
110 !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
113 return reinterpret_cast<const T&
>(tmp);
118 int atomicCAS(
int* address,
int compare,
int val) {
119 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
120 __HIP_MEMORY_SCOPE_AGENT);
126 int atomicCAS_system(
int* address,
int compare,
int val) {
127 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
128 __HIP_MEMORY_SCOPE_SYSTEM);
134 unsigned int atomicCAS(
unsigned int* address,
unsigned int compare,
unsigned int val) {
135 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
136 __HIP_MEMORY_SCOPE_AGENT);
142 unsigned int atomicCAS_system(
unsigned int* address,
unsigned int compare,
unsigned int val) {
143 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
144 __HIP_MEMORY_SCOPE_SYSTEM);
150 unsigned long atomicCAS(
unsigned long* address,
unsigned long compare,
unsigned long val) {
151 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
152 __HIP_MEMORY_SCOPE_AGENT);
158 unsigned long atomicCAS_system(
unsigned long* address,
unsigned long compare,
unsigned long val) {
159 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
160 __HIP_MEMORY_SCOPE_SYSTEM);
166 unsigned long long atomicCAS(
unsigned long long* address,
unsigned long long compare,
167 unsigned long long val) {
168 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
169 __HIP_MEMORY_SCOPE_AGENT);
175 unsigned long long atomicCAS_system(
unsigned long long* address,
unsigned long long compare,
176 unsigned long long val) {
177 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
178 __HIP_MEMORY_SCOPE_SYSTEM);
184 float atomicCAS(
float* address,
float compare,
float val) {
185 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
186 __HIP_MEMORY_SCOPE_AGENT);
192 float atomicCAS_system(
float* address,
float compare,
float val) {
193 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
194 __HIP_MEMORY_SCOPE_SYSTEM);
200 double atomicCAS(
double* address,
double compare,
double val) {
201 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
202 __HIP_MEMORY_SCOPE_AGENT);
208 double atomicCAS_system(
double* address,
double compare,
double val) {
209 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
210 __HIP_MEMORY_SCOPE_SYSTEM);
216 int atomicAdd(
int* address,
int val) {
217 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
222 int atomicAdd_system(
int* address,
int val) {
223 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
228 unsigned int atomicAdd(
unsigned int* address,
unsigned int val) {
229 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
234 unsigned int atomicAdd_system(
unsigned int* address,
unsigned int val) {
235 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
240 unsigned long atomicAdd(
unsigned long* address,
unsigned long val) {
241 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
246 unsigned long atomicAdd_system(
unsigned long* address,
unsigned long val) {
247 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
252 unsigned long long atomicAdd(
unsigned long long* address,
unsigned long long val) {
253 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
258 unsigned long long atomicAdd_system(
unsigned long long* address,
unsigned long long val) {
259 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
264 float atomicAdd(
float* address,
float val) {
265 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
266 return unsafeAtomicAdd(address, val);
268 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
274 float atomicAdd_system(
float* address,
float val) {
275 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
278 #if !defined(__HIPCC_RTC__)
279 DEPRECATED(
"use atomicAdd instead")
283 void atomicAddNoRet(
float* address,
float val)
285 __ockl_atomic_add_noret_f32(address, val);
290 double atomicAdd(
double* address,
double val) {
291 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
292 return unsafeAtomicAdd(address, val);
294 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
300 double atomicAdd_system(
double* address,
double val) {
301 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
306 int atomicSub(
int* address,
int val) {
307 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
312 int atomicSub_system(
int* address,
int val) {
313 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
318 unsigned int atomicSub(
unsigned int* address,
unsigned int val) {
319 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
324 unsigned int atomicSub_system(
unsigned int* address,
unsigned int val) {
325 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
330 unsigned long atomicSub(
unsigned long* address,
unsigned long val) {
331 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
336 unsigned long atomicSub_system(
unsigned long* address,
unsigned long val) {
337 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
342 unsigned long long atomicSub(
unsigned long long* address,
unsigned long long val) {
343 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
348 unsigned long long atomicSub_system(
unsigned long long* address,
unsigned long long val) {
349 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
354 float atomicSub(
float* address,
float val) {
355 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
356 return unsafeAtomicAdd(address, -val);
358 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
364 float atomicSub_system(
float* address,
float val) {
365 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
370 double atomicSub(
double* address,
double val) {
371 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
372 return unsafeAtomicAdd(address, -val);
374 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
380 double atomicSub_system(
double* address,
double val) {
381 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
386 int atomicExch(
int* address,
int val) {
387 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
392 int atomicExch_system(
int* address,
int val) {
393 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
398 unsigned int atomicExch(
unsigned int* address,
unsigned int val) {
399 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
404 unsigned int atomicExch_system(
unsigned int* address,
unsigned int val) {
405 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
410 unsigned long atomicExch(
unsigned long* address,
unsigned long val) {
411 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
416 unsigned long atomicExch_system(
unsigned long* address,
unsigned long val) {
417 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
422 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val) {
423 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
428 unsigned long long atomicExch_system(
unsigned long long* address,
unsigned long long val) {
429 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
434 float atomicExch(
float* address,
float val) {
435 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
440 float atomicExch_system(
float* address,
float val) {
441 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
446 double atomicExch(
double* address,
double val) {
447 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
452 double atomicExch_system(
double* address,
double val) {
453 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
458 int atomicMin(
int* address,
int val) {
459 #if defined(__gfx941__)
460 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
461 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
462 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
463 __HIP_MEMORY_SCOPE_AGENT);
466 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
472 int atomicMin_system(
int* address,
int val) {
473 #if defined(__gfx941__)
474 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
475 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
476 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
477 __HIP_MEMORY_SCOPE_SYSTEM);
480 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
486 unsigned int atomicMin(
unsigned int* address,
unsigned int val) {
487 #if defined(__gfx941__)
488 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
489 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
490 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
491 __HIP_MEMORY_SCOPE_AGENT);
494 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
501 unsigned int atomicMin_system(
unsigned int* address,
unsigned int val) {
502 #if defined(__gfx941__)
503 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
504 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
505 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
506 __HIP_MEMORY_SCOPE_SYSTEM);
509 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
515 unsigned long long atomicMin(
unsigned long* address,
unsigned long val) {
516 #if defined(__gfx941__)
517 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
520 [](
unsigned long x,
unsigned long y) {
return x < y; },
522 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
523 __HIP_MEMORY_SCOPE_AGENT);
526 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
532 unsigned long atomicMin_system(
unsigned long* address,
unsigned long val) {
533 #if defined(__gfx941__)
534 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
537 [](
unsigned long x,
unsigned long y) {
return x < y; },
539 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
540 __HIP_MEMORY_SCOPE_SYSTEM);
543 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
549 unsigned long long atomicMin(
unsigned long long* address,
unsigned long long val) {
550 #if defined(__gfx941__)
551 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
554 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
556 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
557 __HIP_MEMORY_SCOPE_AGENT);
560 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
566 unsigned long long atomicMin_system(
unsigned long long* address,
unsigned long long val) {
567 #if defined(__gfx941__)
568 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
571 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
573 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
574 __HIP_MEMORY_SCOPE_SYSTEM);
577 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
583 long long atomicMin(
long long* address,
long long val) {
584 #if defined(__gfx941__)
585 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
586 address, val, [](
long long x,
long long y) {
return x < y; },
588 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
591 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
597 long long atomicMin_system(
long long* address,
long long val) {
598 #if defined(__gfx941__)
599 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
600 address, val, [](
long long x,
long long y) {
return x < y; },
602 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
605 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
611 float atomicMin(
float* addr,
float val) {
612 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
613 return unsafeAtomicMin(addr, val);
615 typedef union u_hold {
620 bool neg_zero = 0x80000000U == u.b;
621 #if __has_builtin(__hip_atomic_load) && \
622 __has_builtin(__hip_atomic_compare_exchange_strong)
623 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
625 while (!done && (value > val || (neg_zero && value == 0.0f))) {
626 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
627 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
631 unsigned int *uaddr = (
unsigned int *)addr;
632 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
634 while (!done && (__uint_as_float(value) > val || (neg_zero && __uint_as_float(value) == 0.0f))) {
635 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
636 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
638 return __uint_as_float(value);
645 float atomicMin_system(
float* address,
float val) {
646 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
647 #if __has_builtin(__hip_atomic_load)
648 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
650 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
652 float value = __uint_as_float(tmp);
654 while (val < value) {
655 value = atomicCAS_system(address, value, val);
663 double atomicMin(
double* addr,
double val) {
664 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
665 return unsafeAtomicMin(addr, val);
667 typedef union u_hold {
669 unsigned long long b;
672 bool neg_zero = 0x8000000000000000ULL == u.b;
673 #if __has_builtin(__hip_atomic_load) && \
674 __has_builtin(__hip_atomic_compare_exchange_strong)
675 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
677 while (!done && (value > val || (neg_zero && value == 0.0))) {
678 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
679 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
683 unsigned long long *uaddr = (
unsigned long long *)addr;
684 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
687 (__longlong_as_double(value) > val || (neg_zero && __longlong_as_double(value) == 0.0))) {
688 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
689 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
691 return __longlong_as_double(value);
698 double atomicMin_system(
double* address,
double val) {
699 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
700 #if __has_builtin(__hip_atomic_load)
701 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
703 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
705 double value = __longlong_as_double(tmp);
707 while (val < value) {
708 value = atomicCAS_system(address, value, val);
716 int atomicMax(
int* address,
int val) {
717 #if defined(__gfx941__)
718 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
719 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
720 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
721 __HIP_MEMORY_SCOPE_AGENT);
724 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
730 int atomicMax_system(
int* address,
int val) {
731 #if defined(__gfx941__)
732 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
733 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
734 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
735 __HIP_MEMORY_SCOPE_SYSTEM);
738 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
744 unsigned int atomicMax(
unsigned int* address,
unsigned int val) {
745 #if defined(__gfx941__)
746 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
747 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
748 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
749 __HIP_MEMORY_SCOPE_AGENT);
752 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
758 unsigned int atomicMax_system(
unsigned int* address,
unsigned int val) {
759 #if defined(__gfx941__)
760 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
761 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
762 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
763 __HIP_MEMORY_SCOPE_SYSTEM);
766 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
772 unsigned long atomicMax(
unsigned long* address,
unsigned long val) {
773 #if defined(__gfx941__)
774 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
777 [](
unsigned long x,
unsigned long y) {
return y < x; },
779 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
780 __HIP_MEMORY_SCOPE_AGENT);
783 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
789 unsigned long atomicMax_system(
unsigned long* address,
unsigned long val) {
790 #if defined(__gfx941__)
791 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
794 [](
unsigned long x,
unsigned long y) {
return y < x; },
796 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
797 __HIP_MEMORY_SCOPE_SYSTEM);
800 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
806 unsigned long long atomicMax(
unsigned long long* address,
unsigned long long val) {
807 #if defined(__gfx941__)
808 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
811 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
813 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
814 __HIP_MEMORY_SCOPE_AGENT);
817 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
823 unsigned long long atomicMax_system(
unsigned long long* address,
unsigned long long val) {
824 #if defined(__gfx941__)
825 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
828 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
830 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
831 __HIP_MEMORY_SCOPE_SYSTEM);
834 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
840 long long atomicMax(
long long* address,
long long val) {
841 #if defined(__gfx941__)
842 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
843 address, val, [](
long long x,
long long y) {
return y < x; },
845 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
848 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
854 long long atomicMax_system(
long long* address,
long long val) {
855 #if defined(__gfx941__)
856 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
857 address, val, [](
long long x,
long long y) {
return y < x; },
859 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
862 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
868 float atomicMax(
float* addr,
float val) {
869 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
870 return unsafeAtomicMax(addr, val);
872 typedef union u_hold {
877 bool neg_zero = 0x80000000U == u.b;
878 #if __has_builtin(__hip_atomic_load) && \
879 __has_builtin(__hip_atomic_compare_exchange_strong)
880 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
882 while (!done && (value < val || (neg_zero && value == 0.0f))) {
883 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
884 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
888 unsigned int *uaddr = (
unsigned int *)addr;
889 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
891 while (!done && (__uint_as_float(value) < val || (neg_zero && __uint_as_float(value) == 0.0f))) {
892 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
893 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
895 return __uint_as_float(value);
902 float atomicMax_system(
float* address,
float val) {
903 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
904 #if __has_builtin(__hip_atomic_load)
905 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
907 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
909 float value = __uint_as_float(tmp);
911 while (value < val) {
912 value = atomicCAS_system(address, value, val);
920 double atomicMax(
double* addr,
double val) {
921 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
922 return unsafeAtomicMax(addr, val);
924 typedef union u_hold {
926 unsigned long long b;
929 bool neg_zero = 0x8000000000000000ULL == u.b;
930 #if __has_builtin(__hip_atomic_load) && \
931 __has_builtin(__hip_atomic_compare_exchange_strong)
932 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
934 while (!done && (value < val || (neg_zero && value == 0.0))) {
935 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
936 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
940 unsigned long long *uaddr = (
unsigned long long *)addr;
941 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
944 (__longlong_as_double(value) < val || (neg_zero && __longlong_as_double(value) == 0.0))) {
945 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
946 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
948 return __longlong_as_double(value);
955 double atomicMax_system(
double* address,
double val) {
956 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
957 #if __has_builtin(__hip_atomic_load)
958 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
960 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
962 double value = __longlong_as_double(tmp);
964 while (value < val) {
965 value = atomicCAS_system(address, value, val);
973 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
975 #if defined(__gfx941__)
976 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
979 [](
unsigned int& x,
unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
982 __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
985 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
992 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
994 #if defined(__gfx941__)
995 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
998 [](
unsigned int& x,
unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
1001 __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1004 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1011 int atomicAnd(
int* address,
int val) {
1012 #if defined(__gfx941__)
1013 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1014 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1015 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1016 __HIP_MEMORY_SCOPE_AGENT);
1019 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1025 int atomicAnd_system(
int* address,
int val) {
1026 #if defined(__gfx941__)
1027 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1028 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1029 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1030 __HIP_MEMORY_SCOPE_SYSTEM);
1033 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1039 unsigned int atomicAnd(
unsigned int* address,
unsigned int val) {
1040 #if defined(__gfx941__)
1041 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1042 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1043 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1044 __HIP_MEMORY_SCOPE_AGENT);
1047 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1053 unsigned int atomicAnd_system(
unsigned int* address,
unsigned int val) {
1054 #if defined(__gfx941__)
1055 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1056 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1057 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1058 __HIP_MEMORY_SCOPE_SYSTEM);
1061 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1067 unsigned long atomicAnd(
unsigned long* address,
unsigned long val) {
1068 #if defined(__gfx941__)
1069 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1070 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1071 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1072 __HIP_MEMORY_SCOPE_AGENT);
1075 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1081 unsigned long atomicAnd_system(
unsigned long* address,
unsigned long val) {
1082 #if defined(__gfx941__)
1083 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1084 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1085 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1086 __HIP_MEMORY_SCOPE_SYSTEM);
1089 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1095 unsigned long long atomicAnd(
unsigned long long* address,
unsigned long long val) {
1096 #if defined(__gfx941__)
1097 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1100 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1102 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1103 __HIP_MEMORY_SCOPE_AGENT);
1106 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1112 unsigned long long atomicAnd_system(
unsigned long long* address,
unsigned long long val) {
1113 #if defined(__gfx941__)
1114 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1117 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1119 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1120 __HIP_MEMORY_SCOPE_SYSTEM);
1123 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1129 int atomicOr(
int* address,
int val) {
1130 #if defined(__gfx941__)
1131 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1132 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1133 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1134 __HIP_MEMORY_SCOPE_AGENT);
1137 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1143 int atomicOr_system(
int* address,
int val) {
1144 #if defined(__gfx941__)
1145 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1146 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1147 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1148 __HIP_MEMORY_SCOPE_SYSTEM);
1151 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1157 unsigned int atomicOr(
unsigned int* address,
unsigned int val) {
1158 #if defined(__gfx941__)
1159 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1160 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1161 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1162 __HIP_MEMORY_SCOPE_AGENT);
1165 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1171 unsigned int atomicOr_system(
unsigned int* address,
unsigned int val) {
1172 #if defined(__gfx941__)
1173 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1174 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1175 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1176 __HIP_MEMORY_SCOPE_SYSTEM);
1179 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1185 unsigned long atomicOr(
unsigned long* address,
unsigned long val) {
1186 #if defined(__gfx941__)
1187 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1188 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1189 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1190 __HIP_MEMORY_SCOPE_AGENT);
1193 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1199 unsigned long atomicOr_system(
unsigned long* address,
unsigned long val) {
1200 #if defined(__gfx941__)
1201 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1202 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1203 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1204 __HIP_MEMORY_SCOPE_SYSTEM);
1207 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1213 unsigned long long atomicOr(
unsigned long long* address,
unsigned long long val) {
1214 #if defined(__gfx941__)
1215 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1218 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1220 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1221 __HIP_MEMORY_SCOPE_AGENT);
1224 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1230 unsigned long long atomicOr_system(
unsigned long long* address,
unsigned long long val) {
1231 #if defined(__gfx941__)
1232 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1235 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1237 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1238 __HIP_MEMORY_SCOPE_SYSTEM);
1241 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1247 int atomicXor(
int* address,
int val) {
1248 #if defined(__gfx941__)
1249 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1250 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1251 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1252 __HIP_MEMORY_SCOPE_AGENT);
1255 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1261 int atomicXor_system(
int* address,
int val) {
1262 #if defined(__gfx941__)
1263 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1264 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1265 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1266 __HIP_MEMORY_SCOPE_SYSTEM);
1269 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1275 unsigned int atomicXor(
unsigned int* address,
unsigned int val) {
1276 #if defined(__gfx941__)
1277 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1278 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1279 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1280 __HIP_MEMORY_SCOPE_AGENT);
1283 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1289 unsigned int atomicXor_system(
unsigned int* address,
unsigned int val) {
1290 #if defined(__gfx941__)
1291 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1292 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1293 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1294 __HIP_MEMORY_SCOPE_SYSTEM);
1297 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1303 unsigned long atomicXor(
unsigned long* address,
unsigned long val) {
1304 #if defined(__gfx941__)
1305 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1306 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1307 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1308 __HIP_MEMORY_SCOPE_AGENT);
1311 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1317 unsigned long atomicXor_system(
unsigned long* address,
unsigned long val) {
1318 #if defined(__gfx941__)
1319 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1320 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1321 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1322 __HIP_MEMORY_SCOPE_SYSTEM);
1325 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1331 unsigned long long atomicXor(
unsigned long long* address,
unsigned long long val) {
1332 #if defined(__gfx941__)
1333 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1336 [](
unsigned long long& x,
unsigned long long y) { x ^= y; },
1338 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1339 __HIP_MEMORY_SCOPE_AGENT);
1342 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1348 unsigned long long atomicXor_system(
unsigned long long* address,
unsigned long long val) {
1349 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1356 int atomicCAS(
int* address,
int compare,
int val)
1358 __atomic_compare_exchange_n(
1359 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1365 unsigned int atomicCAS(
1366 unsigned int* address,
unsigned int compare,
unsigned int val)
1368 __atomic_compare_exchange_n(
1369 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1375 unsigned long long atomicCAS(
1376 unsigned long long* address,
1377 unsigned long long compare,
1378 unsigned long long val)
1380 __atomic_compare_exchange_n(
1381 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1388 int atomicAdd(
int* address,
int val)
1390 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1394 unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
1396 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1400 unsigned long long atomicAdd(
1401 unsigned long long* address,
unsigned long long val)
1403 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1407 float atomicAdd(
float* address,
float val)
1409 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1410 return unsafeAtomicAdd(address, val);
1412 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1416 #if !defined(__HIPCC_RTC__)
1417 DEPRECATED(
"use atomicAdd instead")
1421 void atomicAddNoRet(
float* address,
float val)
1423 __ockl_atomic_add_noret_f32(address, val);
1428 double atomicAdd(
double* address,
double val)
1430 #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1431 return unsafeAtomicAdd(address, val);
1433 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1439 int atomicSub(
int* address,
int val)
1441 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1445 unsigned int atomicSub(
unsigned int* address,
unsigned int val)
1447 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1452 int atomicExch(
int* address,
int val)
1454 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1458 unsigned int atomicExch(
unsigned int* address,
unsigned int val)
1460 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1464 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
1466 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1470 float atomicExch(
float* address,
float val)
1472 return __uint_as_float(__atomic_exchange_n(
1473 reinterpret_cast<unsigned int*
>(address),
1474 __float_as_uint(val),
1480 int atomicMin(
int* address,
int val)
1482 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1486 unsigned int atomicMin(
unsigned int* address,
unsigned int val)
1488 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1492 unsigned long long atomicMin(
1493 unsigned long long* address,
unsigned long long val)
1495 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1497 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1499 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1501 tmp = atomicCAS(address, tmp, val);
1506 __device__
inline long long atomicMin(
long long* address,
long long val) {
1507 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1509 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1516 tmp = atomicCAS(address, tmp, val);
1523 int atomicMax(
int* address,
int val)
1525 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1529 unsigned int atomicMax(
unsigned int* address,
unsigned int val)
1531 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1535 unsigned long long atomicMax(
1536 unsigned long long* address,
unsigned long long val)
1538 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1540 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1542 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1544 tmp = atomicCAS(address, tmp, val);
1549 __device__
inline long long atomicMax(
long long* address,
long long val) {
1550 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1552 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1559 tmp = atomicCAS(address, tmp, val);
1566 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
1568 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
1573 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
1575 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1580 int atomicAnd(
int* address,
int val)
1582 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1586 unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
1588 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1592 unsigned long long atomicAnd(
1593 unsigned long long* address,
unsigned long long val)
1595 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1600 int atomicOr(
int* address,
int val)
1602 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1606 unsigned int atomicOr(
unsigned int* address,
unsigned int val)
1608 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1612 unsigned long long atomicOr(
1613 unsigned long long* address,
unsigned long long val)
1615 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1620 int atomicXor(
int* address,
int val)
1622 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1626 unsigned int atomicXor(
unsigned int* address,
unsigned int val)
1628 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1632 unsigned long long atomicXor(
1633 unsigned long long* address,
unsigned long long val)
1635 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Definition: hip_fp16_math_fwd.h:57