25#if !defined(__HIPCC_RTC__)
26#include "amd_device_functions.h"
29#if __has_builtin(__hip_atomic_compare_exchange_strong)
31template<
bool B,
typename T,
typename F>
struct Cond_t;
33template<
typename T,
typename F>
struct Cond_t<true, T, F> {
using type = T; };
34template<
typename T,
typename F>
struct Cond_t<false, T, F> {
using type = F; };
36#if !__HIP_DEVICE_COMPILE__
38#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
39#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
40#define __HIP_MEMORY_SCOPE_WORKGROUP 3
41#define __HIP_MEMORY_SCOPE_AGENT 4
42#define __HIP_MEMORY_SCOPE_SYSTEM 5
45#if !defined(__HIPCC_RTC__)
46#include "amd_hip_unsafe_atomics.h"
51 int mem_order = __ATOMIC_SEQ_CST,
52 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
57__attribute__((always_inline, device))
58T hip_cas_expander(T* p, T x, Op op, F f)
noexcept
60 using FP = __attribute__((address_space(0))) const
void*;
63 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
65 if (is_shared_workaround((FP)p))
68 using U = typename Cond_t<
69 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
71 auto q = reinterpret_cast<U*>(p);
73 U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
78 op(
reinterpret_cast<T&
>(tmp1), x);
79 }
while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
80 mem_order, mem_scope));
82 return reinterpret_cast<const T&
>(tmp0);
86 int mem_order = __ATOMIC_SEQ_CST,
87 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
92__attribute__((always_inline, device))
93T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f)
noexcept
95 using FP = __attribute__((address_space(0))) const
void*;
98 extern
bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
100 if (is_shared_workaround((FP)p))
103 using U = typename Cond_t<
104 sizeof(T) == sizeof(
unsigned int),
unsigned int,
unsigned long long>::type;
106 auto q = reinterpret_cast<U*>(p);
108 U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
109 while (cmp(x,
reinterpret_cast<const T&
>(tmp)) &&
110 !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
113 return reinterpret_cast<const T&
>(tmp);
118int atomicCAS(
int* address,
int compare,
int val) {
119 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
120 __HIP_MEMORY_SCOPE_AGENT);
126int atomicCAS_system(
int* address,
int compare,
int val) {
127 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
128 __HIP_MEMORY_SCOPE_SYSTEM);
134unsigned int atomicCAS(
unsigned int* address,
unsigned int compare,
unsigned int val) {
135 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
136 __HIP_MEMORY_SCOPE_AGENT);
142unsigned int atomicCAS_system(
unsigned int* address,
unsigned int compare,
unsigned int val) {
143 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
144 __HIP_MEMORY_SCOPE_SYSTEM);
150unsigned long atomicCAS(
unsigned long* address,
unsigned long compare,
unsigned long val) {
151 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
152 __HIP_MEMORY_SCOPE_AGENT);
158unsigned long atomicCAS_system(
unsigned long* address,
unsigned long compare,
unsigned long val) {
159 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
160 __HIP_MEMORY_SCOPE_SYSTEM);
166unsigned long long atomicCAS(
unsigned long long* address,
unsigned long long compare,
167 unsigned long long val) {
168 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
169 __HIP_MEMORY_SCOPE_AGENT);
175unsigned long long atomicCAS_system(
unsigned long long* address,
unsigned long long compare,
176 unsigned long long val) {
177 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
178 __HIP_MEMORY_SCOPE_SYSTEM);
184float atomicCAS(
float* address,
float compare,
float val) {
185 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
186 __HIP_MEMORY_SCOPE_AGENT);
192float atomicCAS_system(
float* address,
float compare,
float val) {
193 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
194 __HIP_MEMORY_SCOPE_SYSTEM);
200double atomicCAS(
double* address,
double compare,
double val) {
201 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
202 __HIP_MEMORY_SCOPE_AGENT);
208double atomicCAS_system(
double* address,
double compare,
double val) {
209 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
210 __HIP_MEMORY_SCOPE_SYSTEM);
216int atomicAdd(
int* address,
int val) {
217 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
222int atomicAdd_system(
int* address,
int val) {
223 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
228unsigned int atomicAdd(
unsigned int* address,
unsigned int val) {
229 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
234unsigned int atomicAdd_system(
unsigned int* address,
unsigned int val) {
235 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
240unsigned long atomicAdd(
unsigned long* address,
unsigned long val) {
241 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
246unsigned long atomicAdd_system(
unsigned long* address,
unsigned long val) {
247 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
252unsigned long long atomicAdd(
unsigned long long* address,
unsigned long long val) {
253 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
258unsigned long long atomicAdd_system(
unsigned long long* address,
unsigned long long val) {
259 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
264float atomicAdd(
float* address,
float val) {
265#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
266 return unsafeAtomicAdd(address, val);
268 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
274float atomicAdd_system(
float* address,
float val) {
275 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
278#if !defined(__HIPCC_RTC__)
279DEPRECATED(
"use atomicAdd instead")
283void atomicAddNoRet(
float* address,
float val)
285 __ockl_atomic_add_noret_f32(address, val);
290double atomicAdd(
double* address,
double val) {
291#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
292 return unsafeAtomicAdd(address, val);
294 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
300double atomicAdd_system(
double* address,
double val) {
301 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
306int atomicSub(
int* address,
int val) {
307 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
312int atomicSub_system(
int* address,
int val) {
313 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
318unsigned int atomicSub(
unsigned int* address,
unsigned int val) {
319 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
324unsigned int atomicSub_system(
unsigned int* address,
unsigned int val) {
325 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
330unsigned long atomicSub(
unsigned long* address,
unsigned long val) {
331 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
336unsigned long atomicSub_system(
unsigned long* address,
unsigned long val) {
337 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
342unsigned long long atomicSub(
unsigned long long* address,
unsigned long long val) {
343 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
348unsigned long long atomicSub_system(
unsigned long long* address,
unsigned long long val) {
349 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
354float atomicSub(
float* address,
float val) {
355#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
356 return unsafeAtomicAdd(address, -val);
358 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
364float atomicSub_system(
float* address,
float val) {
365 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
370double atomicSub(
double* address,
double val) {
371#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
372 return unsafeAtomicAdd(address, -val);
374 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
380double atomicSub_system(
double* address,
double val) {
381 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
386int atomicExch(
int* address,
int val) {
387 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
392int atomicExch_system(
int* address,
int val) {
393 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
398unsigned int atomicExch(
unsigned int* address,
unsigned int val) {
399 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
404unsigned int atomicExch_system(
unsigned int* address,
unsigned int val) {
405 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
410unsigned long atomicExch(
unsigned long* address,
unsigned long val) {
411 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
416unsigned long atomicExch_system(
unsigned long* address,
unsigned long val) {
417 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
422unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val) {
423 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
428unsigned long long atomicExch_system(
unsigned long long* address,
unsigned long long val) {
429 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
434float atomicExch(
float* address,
float val) {
435 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
440float atomicExch_system(
float* address,
float val) {
441 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
446double atomicExch(
double* address,
double val) {
447 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
452double atomicExch_system(
double* address,
double val) {
453 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
458int atomicMin(
int* address,
int val) {
459#if defined(__gfx941__)
460 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
461 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
462 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
463 __HIP_MEMORY_SCOPE_AGENT);
466 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
472int atomicMin_system(
int* address,
int val) {
473#if defined(__gfx941__)
474 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
475 address, val, [](
int x,
int y) {
return x < y; }, [=]() {
476 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
477 __HIP_MEMORY_SCOPE_SYSTEM);
480 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
486unsigned int atomicMin(
unsigned int* address,
unsigned int val) {
487#if defined(__gfx941__)
488 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
489 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
490 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
491 __HIP_MEMORY_SCOPE_AGENT);
494 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
501unsigned int atomicMin_system(
unsigned int* address,
unsigned int val) {
502#if defined(__gfx941__)
503 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
504 address, val, [](
unsigned int x,
unsigned int y) {
return x < y; }, [=]() {
505 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
506 __HIP_MEMORY_SCOPE_SYSTEM);
509 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
515unsigned long long atomicMin(
unsigned long* address,
unsigned long val) {
516#if defined(__gfx941__)
517 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
520 [](
unsigned long x,
unsigned long y) {
return x < y; },
522 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
523 __HIP_MEMORY_SCOPE_AGENT);
526 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
532unsigned long atomicMin_system(
unsigned long* address,
unsigned long val) {
533#if defined(__gfx941__)
534 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
537 [](
unsigned long x,
unsigned long y) {
return x < y; },
539 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
540 __HIP_MEMORY_SCOPE_SYSTEM);
543 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
549unsigned long long atomicMin(
unsigned long long* address,
unsigned long long val) {
550#if defined(__gfx941__)
551 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
554 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
556 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
557 __HIP_MEMORY_SCOPE_AGENT);
560 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
566unsigned long long atomicMin_system(
unsigned long long* address,
unsigned long long val) {
567#if defined(__gfx941__)
568 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
571 [](
unsigned long long x,
unsigned long long y) {
return x < y; },
573 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
574 __HIP_MEMORY_SCOPE_SYSTEM);
577 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
583long long atomicMin(
long long* address,
long long val) {
584#if defined(__gfx941__)
585 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
586 address, val, [](
long long x,
long long y) {
return x < y; },
588 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
591 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
597long long atomicMin_system(
long long* address,
long long val) {
598#if defined(__gfx941__)
599 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
600 address, val, [](
long long x,
long long y) {
return x < y; },
602 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
605 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
611float atomicMin(
float* addr,
float val) {
612#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
613 return unsafeAtomicMin(addr, val);
615 typedef union u_hold {
620 bool neg_zero = 0x80000000U == u.b;
621 #if __has_builtin(__hip_atomic_load) && \
622 __has_builtin(__hip_atomic_compare_exchange_strong)
623 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
625 while (!done && (value > val || (neg_zero && value == 0.0f))) {
626 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
627 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
631 unsigned int *uaddr = (
unsigned int *)addr;
632 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
634 while (!done && (__uint_as_float(value) > val || (neg_zero && __uint_as_float(value) == 0.0f))) {
635 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
636 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
638 return __uint_as_float(value);
645float atomicMin_system(
float* address,
float val) {
646 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
647 #if __has_builtin(__hip_atomic_load)
648 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
650 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
652 float value = __uint_as_float(tmp);
654 while (val < value) {
655 value = atomicCAS_system(address, value, val);
663double atomicMin(
double* addr,
double val) {
664#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
665 return unsafeAtomicMin(addr, val);
667 typedef union u_hold {
669 unsigned long long b;
672 bool neg_zero = 0x8000000000000000ULL == u.b;
673 #if __has_builtin(__hip_atomic_load) && \
674 __has_builtin(__hip_atomic_compare_exchange_strong)
675 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
677 while (!done && (value > val || (neg_zero && value == 0.0))) {
678 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
679 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
683 unsigned long long *uaddr = (
unsigned long long *)addr;
684 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
687 (__longlong_as_double(value) > val || (neg_zero && __longlong_as_double(value) == 0.0))) {
688 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
689 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
691 return __longlong_as_double(value);
698double atomicMin_system(
double* address,
double val) {
699 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
700 #if __has_builtin(__hip_atomic_load)
701 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
703 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
705 double value = __longlong_as_double(tmp);
707 while (val < value) {
708 value = atomicCAS_system(address, value, val);
716int atomicMax(
int* address,
int val) {
717#if defined(__gfx941__)
718 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
719 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
720 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
721 __HIP_MEMORY_SCOPE_AGENT);
724 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
730int atomicMax_system(
int* address,
int val) {
731#if defined(__gfx941__)
732 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
733 address, val, [](
int x,
int y) {
return y < x; }, [=]() {
734 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
735 __HIP_MEMORY_SCOPE_SYSTEM);
738 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
744unsigned int atomicMax(
unsigned int* address,
unsigned int val) {
745#if defined(__gfx941__)
746 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
747 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
748 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
749 __HIP_MEMORY_SCOPE_AGENT);
752 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
758unsigned int atomicMax_system(
unsigned int* address,
unsigned int val) {
759#if defined(__gfx941__)
760 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
761 address, val, [](
unsigned int x,
unsigned int y) {
return y < x; }, [=]() {
762 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
763 __HIP_MEMORY_SCOPE_SYSTEM);
766 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
772unsigned long atomicMax(
unsigned long* address,
unsigned long val) {
773#if defined(__gfx941__)
774 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
777 [](
unsigned long x,
unsigned long y) {
return y < x; },
779 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
780 __HIP_MEMORY_SCOPE_AGENT);
783 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
789unsigned long atomicMax_system(
unsigned long* address,
unsigned long val) {
790#if defined(__gfx941__)
791 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
794 [](
unsigned long x,
unsigned long y) {
return y < x; },
796 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
797 __HIP_MEMORY_SCOPE_SYSTEM);
800 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
806unsigned long long atomicMax(
unsigned long long* address,
unsigned long long val) {
807#if defined(__gfx941__)
808 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
811 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
813 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
814 __HIP_MEMORY_SCOPE_AGENT);
817 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
823unsigned long long atomicMax_system(
unsigned long long* address,
unsigned long long val) {
824#if defined(__gfx941__)
825 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
828 [](
unsigned long long x,
unsigned long long y) {
return y < x; },
830 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
831 __HIP_MEMORY_SCOPE_SYSTEM);
834 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
840long long atomicMax(
long long* address,
long long val) {
841 #if defined(__gfx941__)
842 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
843 address, val, [](
long long x,
long long y) {
return y < x; },
845 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
848 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
854long long atomicMax_system(
long long* address,
long long val) {
855#if defined(__gfx941__)
856 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
857 address, val, [](
long long x,
long long y) {
return y < x; },
859 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
862 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
868float atomicMax(
float* addr,
float val) {
869#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
870 return unsafeAtomicMax(addr, val);
872 typedef union u_hold {
877 bool neg_zero = 0x80000000U == u.b;
878 #if __has_builtin(__hip_atomic_load) && \
879 __has_builtin(__hip_atomic_compare_exchange_strong)
880 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
882 while (!done && (value < val || (neg_zero && value == 0.0f))) {
883 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
884 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
888 unsigned int *uaddr = (
unsigned int *)addr;
889 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
891 while (!done && (__uint_as_float(value) < val || (neg_zero && __uint_as_float(value) == 0.0f))) {
892 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
893 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
895 return __uint_as_float(value);
902float atomicMax_system(
float* address,
float val) {
903 unsigned int* uaddr {
reinterpret_cast<unsigned int*
>(address) };
904 #if __has_builtin(__hip_atomic_load)
905 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
907 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
909 float value = __uint_as_float(tmp);
911 while (value < val) {
912 value = atomicCAS_system(address, value, val);
920double atomicMax(
double* addr,
double val) {
921#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
922 return unsafeAtomicMax(addr, val);
924 typedef union u_hold {
926 unsigned long long b;
929 bool neg_zero = 0x8000000000000000ULL == u.b;
930 #if __has_builtin(__hip_atomic_load) && \
931 __has_builtin(__hip_atomic_compare_exchange_strong)
932 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
934 while (!done && (value < val || (neg_zero && value == 0.0))) {
935 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
936 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
940 unsigned long long *uaddr = (
unsigned long long *)addr;
941 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
944 (__longlong_as_double(value) < val || (neg_zero && __longlong_as_double(value) == 0.0))) {
945 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
946 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
948 return __longlong_as_double(value);
955double atomicMax_system(
double* address,
double val) {
956 unsigned long long* uaddr {
reinterpret_cast<unsigned long long*
>(address) };
957 #if __has_builtin(__hip_atomic_load)
958 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
960 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
962 double value = __longlong_as_double(tmp);
964 while (value < val) {
965 value = atomicCAS_system(address, value, val);
973unsigned int atomicInc(
unsigned int* address,
unsigned int val)
975#if defined(__gfx941__)
976 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
979 [](
unsigned int& x,
unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
982 __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
985 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
992unsigned int atomicDec(
unsigned int* address,
unsigned int val)
994#if defined(__gfx941__)
995 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
998 [](
unsigned int& x,
unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
1001 __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1004 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1011int atomicAnd(
int* address,
int val) {
1012#if defined(__gfx941__)
1013 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1014 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1015 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1016 __HIP_MEMORY_SCOPE_AGENT);
1019 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1025int atomicAnd_system(
int* address,
int val) {
1026#if defined(__gfx941__)
1027 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1028 address, val, [](
int& x,
int y) { x &= y; }, [=]() {
1029 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1030 __HIP_MEMORY_SCOPE_SYSTEM);
1033 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1039unsigned int atomicAnd(
unsigned int* address,
unsigned int val) {
1040#if defined(__gfx941__)
1041 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1042 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1043 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1044 __HIP_MEMORY_SCOPE_AGENT);
1047 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1053unsigned int atomicAnd_system(
unsigned int* address,
unsigned int val) {
1054#if defined(__gfx941__)
1055 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1056 address, val, [](
unsigned int& x,
unsigned int y) { x &= y; }, [=]() {
1057 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1058 __HIP_MEMORY_SCOPE_SYSTEM);
1061 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1067unsigned long atomicAnd(
unsigned long* address,
unsigned long val) {
1068#if defined(__gfx941__)
1069 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1070 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1071 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1072 __HIP_MEMORY_SCOPE_AGENT);
1075 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1081unsigned long atomicAnd_system(
unsigned long* address,
unsigned long val) {
1082#if defined(__gfx941__)
1083 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1084 address, val, [](
unsigned long& x,
unsigned long y) { x &= y; }, [=]() {
1085 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1086 __HIP_MEMORY_SCOPE_SYSTEM);
1089 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1095unsigned long long atomicAnd(
unsigned long long* address,
unsigned long long val) {
1096#if defined(__gfx941__)
1097 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1100 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1102 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1103 __HIP_MEMORY_SCOPE_AGENT);
1106 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1112unsigned long long atomicAnd_system(
unsigned long long* address,
unsigned long long val) {
1113#if defined(__gfx941__)
1114 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1117 [](
unsigned long long& x,
unsigned long long y) { x &= y; },
1119 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1120 __HIP_MEMORY_SCOPE_SYSTEM);
1123 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1129int atomicOr(
int* address,
int val) {
1130#if defined(__gfx941__)
1131 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1132 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1133 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1134 __HIP_MEMORY_SCOPE_AGENT);
1137 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1143int atomicOr_system(
int* address,
int val) {
1144#if defined(__gfx941__)
1145 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1146 address, val, [](
int& x,
int y) { x |= y; }, [=]() {
1147 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1148 __HIP_MEMORY_SCOPE_SYSTEM);
1151 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1157unsigned int atomicOr(
unsigned int* address,
unsigned int val) {
1158#if defined(__gfx941__)
1159 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1160 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1161 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1162 __HIP_MEMORY_SCOPE_AGENT);
1165 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1171unsigned int atomicOr_system(
unsigned int* address,
unsigned int val) {
1172#if defined(__gfx941__)
1173 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1174 address, val, [](
unsigned int& x,
unsigned int y) { x |= y; }, [=]() {
1175 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1176 __HIP_MEMORY_SCOPE_SYSTEM);
1179 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1185unsigned long atomicOr(
unsigned long* address,
unsigned long val) {
1186#if defined(__gfx941__)
1187 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1188 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1189 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1190 __HIP_MEMORY_SCOPE_AGENT);
1193 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1199unsigned long atomicOr_system(
unsigned long* address,
unsigned long val) {
1200#if defined(__gfx941__)
1201 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1202 address, val, [](
unsigned long& x,
unsigned long y) { x |= y; }, [=]() {
1203 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1204 __HIP_MEMORY_SCOPE_SYSTEM);
1207 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1213unsigned long long atomicOr(
unsigned long long* address,
unsigned long long val) {
1214#if defined(__gfx941__)
1215 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1218 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1220 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1221 __HIP_MEMORY_SCOPE_AGENT);
1224 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1230unsigned long long atomicOr_system(
unsigned long long* address,
unsigned long long val) {
1231#if defined(__gfx941__)
1232 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1235 [](
unsigned long long& x,
unsigned long long y) { x |= y; },
1237 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1238 __HIP_MEMORY_SCOPE_SYSTEM);
1241 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1247int atomicXor(
int* address,
int val) {
1248#if defined(__gfx941__)
1249 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1250 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1251 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1252 __HIP_MEMORY_SCOPE_AGENT);
1255 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1261int atomicXor_system(
int* address,
int val) {
1262#if defined(__gfx941__)
1263 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1264 address, val, [](
int& x,
int y) { x ^= y; }, [=]() {
1265 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1266 __HIP_MEMORY_SCOPE_SYSTEM);
1269 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1275unsigned int atomicXor(
unsigned int* address,
unsigned int val) {
1276#if defined(__gfx941__)
1277 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1278 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1279 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1280 __HIP_MEMORY_SCOPE_AGENT);
1283 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1289unsigned int atomicXor_system(
unsigned int* address,
unsigned int val) {
1290#if defined(__gfx941__)
1291 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1292 address, val, [](
unsigned int& x,
unsigned int y) { x ^= y; }, [=]() {
1293 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1294 __HIP_MEMORY_SCOPE_SYSTEM);
1297 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1303unsigned long atomicXor(
unsigned long* address,
unsigned long val) {
1304#if defined(__gfx941__)
1305 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1306 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1307 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1308 __HIP_MEMORY_SCOPE_AGENT);
1311 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1317unsigned long atomicXor_system(
unsigned long* address,
unsigned long val) {
1318#if defined(__gfx941__)
1319 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1320 address, val, [](
unsigned long& x,
unsigned long y) { x ^= y; }, [=]() {
1321 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1322 __HIP_MEMORY_SCOPE_SYSTEM);
1325 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1331unsigned long long atomicXor(
unsigned long long* address,
unsigned long long val) {
1332#if defined(__gfx941__)
1333 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1336 [](
unsigned long long& x,
unsigned long long y) { x ^= y; },
1338 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1339 __HIP_MEMORY_SCOPE_AGENT);
1342 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1348unsigned long long atomicXor_system(
unsigned long long* address,
unsigned long long val) {
1349 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1356int atomicCAS(
int* address,
int compare,
int val)
1358 __atomic_compare_exchange_n(
1359 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1365unsigned int atomicCAS(
1366 unsigned int* address,
unsigned int compare,
unsigned int val)
1368 __atomic_compare_exchange_n(
1369 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1375unsigned long long atomicCAS(
1376 unsigned long long* address,
1377 unsigned long long compare,
1378 unsigned long long val)
1380 __atomic_compare_exchange_n(
1381 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1388int atomicAdd(
int* address,
int val)
1390 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1394unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
1396 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1400unsigned long long atomicAdd(
1401 unsigned long long* address,
unsigned long long val)
1403 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1407float atomicAdd(
float* address,
float val)
1409#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1410 return unsafeAtomicAdd(address, val);
1412 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1416#if !defined(__HIPCC_RTC__)
1417DEPRECATED(
"use atomicAdd instead")
1421void atomicAddNoRet(
float* address,
float val)
1423 __ockl_atomic_add_noret_f32(address, val);
1428double atomicAdd(
double* address,
double val)
1430#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1431 return unsafeAtomicAdd(address, val);
1433 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1439int atomicSub(
int* address,
int val)
1441 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1445unsigned int atomicSub(
unsigned int* address,
unsigned int val)
1447 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1452int atomicExch(
int* address,
int val)
1454 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1458unsigned int atomicExch(
unsigned int* address,
unsigned int val)
1460 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1464unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
1466 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1470float atomicExch(
float* address,
float val)
1472 return __uint_as_float(__atomic_exchange_n(
1473 reinterpret_cast<unsigned int*
>(address),
1474 __float_as_uint(val),
1480int atomicMin(
int* address,
int val)
1482 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1486unsigned int atomicMin(
unsigned int* address,
unsigned int val)
1488 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1492unsigned long long atomicMin(
1493 unsigned long long* address,
unsigned long long val)
1495 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1497 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1499 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1501 tmp = atomicCAS(address, tmp, val);
1506__device__
inline long long atomicMin(
long long* address,
long long val) {
1507 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1509 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1516 tmp = atomicCAS(address, tmp, val);
1523int atomicMax(
int* address,
int val)
1525 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1529unsigned int atomicMax(
unsigned int* address,
unsigned int val)
1531 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1535unsigned long long atomicMax(
1536 unsigned long long* address,
unsigned long long val)
1538 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1540 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1542 if (tmp1 != tmp) { tmp = tmp1;
continue; }
1544 tmp = atomicCAS(address, tmp, val);
1549__device__
inline long long atomicMax(
long long* address,
long long val) {
1550 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1552 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1559 tmp = atomicCAS(address, tmp, val);
1566unsigned int atomicInc(
unsigned int* address,
unsigned int val)
1568 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED,
"agent");
1573unsigned int atomicDec(
unsigned int* address,
unsigned int val)
1575 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED,
"agent");
1580int atomicAnd(
int* address,
int val)
1582 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1586unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
1588 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1592unsigned long long atomicAnd(
1593 unsigned long long* address,
unsigned long long val)
1595 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1600int atomicOr(
int* address,
int val)
1602 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1606unsigned int atomicOr(
unsigned int* address,
unsigned int val)
1608 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1612unsigned long long atomicOr(
1613 unsigned long long* address,
unsigned long long val)
1615 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1620int atomicXor(
int* address,
int val)
1622 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1626unsigned int atomicXor(
unsigned int* address,
unsigned int val)
1628 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1632unsigned long long atomicXor(
1633 unsigned long long* address,
unsigned long long val)
1635 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);