HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_atomic.h
1/*
2Copyright (c) 2015 - Present Advanced Micro Devices, Inc. All rights reserved.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
23#pragma once
24
25#if !defined(__HIPCC_RTC__)
26#include "amd_device_functions.h"
27#endif
28
29#if __has_builtin(__hip_atomic_compare_exchange_strong)
30
31template<bool B, typename T, typename F> struct Cond_t;
32
33template<typename T, typename F> struct Cond_t<true, T, F> { using type = T; };
34template<typename T, typename F> struct Cond_t<false, T, F> { using type = F; };
35
36#if !__HIP_DEVICE_COMPILE__
37//TODO: Remove this after compiler pre-defines the following Macros.
38#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
39#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
40#define __HIP_MEMORY_SCOPE_WORKGROUP 3
41#define __HIP_MEMORY_SCOPE_AGENT 4
42#define __HIP_MEMORY_SCOPE_SYSTEM 5
43#endif
44
45#if !defined(__HIPCC_RTC__)
46#include "amd_hip_unsafe_atomics.h"
47#endif
48
49// Atomic expanders
50template<
51 int mem_order = __ATOMIC_SEQ_CST,
52 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
53 typename T,
54 typename Op,
55 typename F>
56inline
57__attribute__((always_inline, device))
58T hip_cas_expander(T* p, T x, Op op, F f) noexcept
59{
60 using FP = __attribute__((address_space(0))) const void*;
61
62 __device__
63 extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
64
65 if (is_shared_workaround((FP)p))
66 return f();
67
68 using U = typename Cond_t<
69 sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
70
71 auto q = reinterpret_cast<U*>(p);
72
73 U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
74 U tmp1;
75 do {
76 tmp1 = tmp0;
77
78 op(reinterpret_cast<T&>(tmp1), x);
79 } while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
80 mem_order, mem_scope));
81
82 return reinterpret_cast<const T&>(tmp0);
83}
84
85template<
86 int mem_order = __ATOMIC_SEQ_CST,
87 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
88 typename T,
89 typename Cmp,
90 typename F>
91inline
92__attribute__((always_inline, device))
93T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
94{
95 using FP = __attribute__((address_space(0))) const void*;
96
97 __device__
98 extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
99
100 if (is_shared_workaround((FP)p))
101 return f();
102
103 using U = typename Cond_t<
104 sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
105
106 auto q = reinterpret_cast<U*>(p);
107
108 U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
109 while (cmp(x, reinterpret_cast<const T&>(tmp)) &&
110 !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
111 mem_scope));
112
113 return reinterpret_cast<const T&>(tmp);
114}
115
116__device__
117inline
118int atomicCAS(int* address, int compare, int val) {
119 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
120 __HIP_MEMORY_SCOPE_AGENT);
121 return compare;
122}
123
124__device__
125inline
126int atomicCAS_system(int* address, int compare, int val) {
127 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
128 __HIP_MEMORY_SCOPE_SYSTEM);
129 return compare;
130}
131
132__device__
133inline
134unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
135 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
136 __HIP_MEMORY_SCOPE_AGENT);
137 return compare;
138}
139
140__device__
141inline
142unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) {
143 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
144 __HIP_MEMORY_SCOPE_SYSTEM);
145 return compare;
146}
147
148__device__
149inline
150unsigned long atomicCAS(unsigned long* address, unsigned long compare, unsigned long val) {
151 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
152 __HIP_MEMORY_SCOPE_AGENT);
153 return compare;
154}
155
156__device__
157inline
158unsigned long atomicCAS_system(unsigned long* address, unsigned long compare, unsigned long val) {
159 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
160 __HIP_MEMORY_SCOPE_SYSTEM);
161 return compare;
162}
163
164__device__
165inline
166unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare,
167 unsigned long long val) {
168 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
169 __HIP_MEMORY_SCOPE_AGENT);
170 return compare;
171}
172
173__device__
174inline
175unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare,
176 unsigned long long val) {
177 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
178 __HIP_MEMORY_SCOPE_SYSTEM);
179 return compare;
180}
181
182__device__
183inline
184float atomicCAS(float* address, float compare, float val) {
185 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
186 __HIP_MEMORY_SCOPE_AGENT);
187 return compare;
188}
189
190__device__
191inline
192float atomicCAS_system(float* address, float compare, float val) {
193 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
194 __HIP_MEMORY_SCOPE_SYSTEM);
195 return compare;
196}
197
198__device__
199inline
200double atomicCAS(double* address, double compare, double val) {
201 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
202 __HIP_MEMORY_SCOPE_AGENT);
203 return compare;
204}
205
206__device__
207inline
208double atomicCAS_system(double* address, double compare, double val) {
209 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
210 __HIP_MEMORY_SCOPE_SYSTEM);
211 return compare;
212}
213
214__device__
215inline
216int atomicAdd(int* address, int val) {
217 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
218}
219
220__device__
221inline
222int atomicAdd_system(int* address, int val) {
223 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
224}
225
226__device__
227inline
228unsigned int atomicAdd(unsigned int* address, unsigned int val) {
229 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
230}
231
232__device__
233inline
234unsigned int atomicAdd_system(unsigned int* address, unsigned int val) {
235 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
236}
237
238__device__
239inline
240unsigned long atomicAdd(unsigned long* address, unsigned long val) {
241 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
242}
243
244__device__
245inline
246unsigned long atomicAdd_system(unsigned long* address, unsigned long val) {
247 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
248}
249
250__device__
251inline
252unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) {
253 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
254}
255
256__device__
257inline
258unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) {
259 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
260}
261
262__device__
263inline
264float atomicAdd(float* address, float val) {
265#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
266 return unsafeAtomicAdd(address, val);
267#else
268 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
269#endif
270}
271
272__device__
273inline
274float atomicAdd_system(float* address, float val) {
275 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
276}
277
278#if !defined(__HIPCC_RTC__)
279DEPRECATED("use atomicAdd instead")
280#endif // !defined(__HIPCC_RTC__)
281__device__
282inline
283void atomicAddNoRet(float* address, float val)
284{
285 __ockl_atomic_add_noret_f32(address, val);
286}
287
288__device__
289inline
290double atomicAdd(double* address, double val) {
291#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
292 return unsafeAtomicAdd(address, val);
293#else
294 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
295#endif
296}
297
298__device__
299inline
300double atomicAdd_system(double* address, double val) {
301 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
302}
303
304__device__
305inline
306int atomicSub(int* address, int val) {
307 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
308}
309
310__device__
311inline
312int atomicSub_system(int* address, int val) {
313 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
314}
315
316__device__
317inline
318unsigned int atomicSub(unsigned int* address, unsigned int val) {
319 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
320}
321
322__device__
323inline
324unsigned int atomicSub_system(unsigned int* address, unsigned int val) {
325 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
326}
327
328__device__
329inline
330unsigned long atomicSub(unsigned long* address, unsigned long val) {
331 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
332}
333
334__device__
335inline
336unsigned long atomicSub_system(unsigned long* address, unsigned long val) {
337 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
338}
339
340__device__
341inline
342unsigned long long atomicSub(unsigned long long* address, unsigned long long val) {
343 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
344}
345
346__device__
347inline
348unsigned long long atomicSub_system(unsigned long long* address, unsigned long long val) {
349 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
350}
351
352__device__
353inline
354float atomicSub(float* address, float val) {
355#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
356 return unsafeAtomicAdd(address, -val);
357#else
358 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
359#endif
360}
361
362__device__
363inline
364float atomicSub_system(float* address, float val) {
365 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
366}
367
368__device__
369inline
370double atomicSub(double* address, double val) {
371#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
372 return unsafeAtomicAdd(address, -val);
373#else
374 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
375#endif
376}
377
378__device__
379inline
380double atomicSub_system(double* address, double val) {
381 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
382}
383
384__device__
385inline
386int atomicExch(int* address, int val) {
387 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
388}
389
390__device__
391inline
392int atomicExch_system(int* address, int val) {
393 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
394}
395
396__device__
397inline
398unsigned int atomicExch(unsigned int* address, unsigned int val) {
399 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
400}
401
402__device__
403inline
404unsigned int atomicExch_system(unsigned int* address, unsigned int val) {
405 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
406}
407
408__device__
409inline
410unsigned long atomicExch(unsigned long* address, unsigned long val) {
411 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
412}
413
414__device__
415inline
416unsigned long atomicExch_system(unsigned long* address, unsigned long val) {
417 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
418}
419
420__device__
421inline
422unsigned long long atomicExch(unsigned long long* address, unsigned long long val) {
423 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
424}
425
426__device__
427inline
428unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) {
429 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
430}
431
432__device__
433inline
434float atomicExch(float* address, float val) {
435 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
436}
437
438__device__
439inline
440float atomicExch_system(float* address, float val) {
441 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
442}
443
444__device__
445inline
446double atomicExch(double* address, double val) {
447 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
448}
449
450__device__
451inline
452double atomicExch_system(double* address, double val) {
453 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
454}
455
456__device__
457inline
458int atomicMin(int* address, int val) {
459#if defined(__gfx941__)
460 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
461 address, val, [](int x, int y) { return x < y; }, [=]() {
462 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
463 __HIP_MEMORY_SCOPE_AGENT);
464 });
465#else
466 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
467#endif // __gfx941__
468}
469
470__device__
471inline
472int atomicMin_system(int* address, int val) {
473#if defined(__gfx941__)
474 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
475 address, val, [](int x, int y) { return x < y; }, [=]() {
476 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
477 __HIP_MEMORY_SCOPE_SYSTEM);
478 });
479#else
480 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
481#endif // __gfx941__
482}
483
484__device__
485inline
486unsigned int atomicMin(unsigned int* address, unsigned int val) {
487#if defined(__gfx941__)
488 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
489 address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
490 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
491 __HIP_MEMORY_SCOPE_AGENT);
492 });
493#else
494 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
495#endif // __gfx941__
496
497}
498
499__device__
500inline
501unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
502#if defined(__gfx941__)
503 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
504 address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
505 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
506 __HIP_MEMORY_SCOPE_SYSTEM);
507 });
508#else
509 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
510#endif // __gfx941__
511}
512
513__device__
514inline
515unsigned long long atomicMin(unsigned long* address, unsigned long val) {
516#if defined(__gfx941__)
517 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
518 address,
519 val,
520 [](unsigned long x, unsigned long y) { return x < y; },
521 [=]() {
522 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
523 __HIP_MEMORY_SCOPE_AGENT);
524 });
525#else
526 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
527#endif // __gfx941__
528}
529
530__device__
531inline
532unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
533#if defined(__gfx941__)
534 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
535 address,
536 val,
537 [](unsigned long x, unsigned long y) { return x < y; },
538 [=]() {
539 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
540 __HIP_MEMORY_SCOPE_SYSTEM);
541 });
542#else
543 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
544#endif // __gfx941__
545}
546
547__device__
548inline
549unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
550#if defined(__gfx941__)
551 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
552 address,
553 val,
554 [](unsigned long long x, unsigned long long y) { return x < y; },
555 [=]() {
556 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
557 __HIP_MEMORY_SCOPE_AGENT);
558 });
559#else
560 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
561#endif // __gfx941__
562}
563
564__device__
565inline
566unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
567#if defined(__gfx941__)
568 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
569 address,
570 val,
571 [](unsigned long long x, unsigned long long y) { return x < y; },
572 [=]() {
573 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
574 __HIP_MEMORY_SCOPE_SYSTEM);
575 });
576#else
577 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
578#endif // __gfx941__
579}
580
581__device__
582inline
583long long atomicMin(long long* address, long long val) {
584#if defined(__gfx941__)
585 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
586 address, val, [](long long x, long long y) { return x < y; },
587 [=]() {
588 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
589 });
590#else
591 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
592#endif // __gfx941__
593}
594
595__device__
596inline
597long long atomicMin_system(long long* address, long long val) {
598#if defined(__gfx941__)
599 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
600 address, val, [](long long x, long long y) { return x < y; },
601 [=]() {
602 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
603 });
604#else
605 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
606#endif // __gfx941__
607}
608
609__device__
610inline
611float atomicMin(float* addr, float val) {
612#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
613 return unsafeAtomicMin(addr, val);
614#else
615 typedef union u_hold {
616 float a;
617 unsigned int b;
618 } u_hold_t;
619 u_hold_t u{val};
620 bool neg_zero = 0x80000000U == u.b;
621 #if __has_builtin(__hip_atomic_load) && \
622 __has_builtin(__hip_atomic_compare_exchange_strong)
623 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
624 bool done = false;
625 while (!done && (value > val || (neg_zero && value == 0.0f))) {
626 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
627 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
628 }
629 return value;
630 #else
631 unsigned int *uaddr = (unsigned int *)addr;
632 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
633 bool done = false;
634 while (!done && (__uint_as_float(value) > val || (neg_zero && __uint_as_float(value) == 0.0f))) {
635 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
636 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
637 }
638 return __uint_as_float(value);
639 #endif
640#endif
641}
642
643__device__
644inline
645float atomicMin_system(float* address, float val) {
646 unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
647 #if __has_builtin(__hip_atomic_load)
648 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
649 #else
650 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
651 #endif
652 float value = __uint_as_float(tmp);
653
654 while (val < value) {
655 value = atomicCAS_system(address, value, val);
656 }
657
658 return value;
659}
660
661__device__
662inline
663double atomicMin(double* addr, double val) {
664#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
665 return unsafeAtomicMin(addr, val);
666#else
667 typedef union u_hold {
668 double a;
669 unsigned long long b;
670 } u_hold_t;
671 u_hold_t u{val};
672 bool neg_zero = 0x8000000000000000ULL == u.b;
673 #if __has_builtin(__hip_atomic_load) && \
674 __has_builtin(__hip_atomic_compare_exchange_strong)
675 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
676 bool done = false;
677 while (!done && (value > val || (neg_zero && value == 0.0))) {
678 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
679 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
680 }
681 return value;
682 #else
683 unsigned long long *uaddr = (unsigned long long *)addr;
684 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
685 bool done = false;
686 while (!done &&
687 (__longlong_as_double(value) > val || (neg_zero && __longlong_as_double(value) == 0.0))) {
688 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
689 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
690 }
691 return __longlong_as_double(value);
692 #endif
693#endif
694}
695
696__device__
697inline
698double atomicMin_system(double* address, double val) {
699 unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
700 #if __has_builtin(__hip_atomic_load)
701 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
702 #else
703 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
704 #endif
705 double value = __longlong_as_double(tmp);
706
707 while (val < value) {
708 value = atomicCAS_system(address, value, val);
709 }
710
711 return value;
712}
713
714__device__
715inline
716int atomicMax(int* address, int val) {
717#if defined(__gfx941__)
718 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
719 address, val, [](int x, int y) { return y < x; }, [=]() {
720 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
721 __HIP_MEMORY_SCOPE_AGENT);
722 });
723#else
724 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
725#endif // __gfx941__
726}
727
728__device__
729inline
730int atomicMax_system(int* address, int val) {
731#if defined(__gfx941__)
732 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
733 address, val, [](int x, int y) { return y < x; }, [=]() {
734 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
735 __HIP_MEMORY_SCOPE_SYSTEM);
736 });
737#else
738 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
739#endif // __gfx941__
740}
741
742__device__
743inline
744unsigned int atomicMax(unsigned int* address, unsigned int val) {
745#if defined(__gfx941__)
746 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
747 address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
748 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
749 __HIP_MEMORY_SCOPE_AGENT);
750 });
751#else
752 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
753#endif // __gfx941__
754}
755
756__device__
757inline
758unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
759#if defined(__gfx941__)
760 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
761 address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
762 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
763 __HIP_MEMORY_SCOPE_SYSTEM);
764 });
765#else
766 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
767#endif // __gfx941__
768}
769
770__device__
771inline
772unsigned long atomicMax(unsigned long* address, unsigned long val) {
773#if defined(__gfx941__)
774 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
775 address,
776 val,
777 [](unsigned long x, unsigned long y) { return y < x; },
778 [=]() {
779 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
780 __HIP_MEMORY_SCOPE_AGENT);
781 });
782#else
783 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
784#endif // __gfx941__
785}
786
787__device__
788inline
789unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
790#if defined(__gfx941__)
791 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
792 address,
793 val,
794 [](unsigned long x, unsigned long y) { return y < x; },
795 [=]() {
796 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
797 __HIP_MEMORY_SCOPE_SYSTEM);
798 });
799#else
800 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
801#endif // __gfx941__
802}
803
804__device__
805inline
806unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
807#if defined(__gfx941__)
808 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
809 address,
810 val,
811 [](unsigned long long x, unsigned long long y) { return y < x; },
812 [=]() {
813 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
814 __HIP_MEMORY_SCOPE_AGENT);
815 });
816#else
817 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
818#endif // __gfx941__
819}
820
821__device__
822inline
823unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
824#if defined(__gfx941__)
825 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
826 address,
827 val,
828 [](unsigned long long x, unsigned long long y) { return y < x; },
829 [=]() {
830 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
831 __HIP_MEMORY_SCOPE_SYSTEM);
832 });
833#else
834 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
835#endif // __gfx941__
836}
837
838__device__
839inline
840long long atomicMax(long long* address, long long val) {
841 #if defined(__gfx941__)
842 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
843 address, val, [](long long x, long long y) { return y < x; },
844 [=]() {
845 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
846 });
847#else
848 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
849#endif // __gfx941__
850}
851
852__device__
853inline
854long long atomicMax_system(long long* address, long long val) {
855#if defined(__gfx941__)
856 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
857 address, val, [](long long x, long long y) { return y < x; },
858 [=]() {
859 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
860 });
861#else
862 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
863#endif // __gfx941__
864}
865
866__device__
867inline
868float atomicMax(float* addr, float val) {
869#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
870 return unsafeAtomicMax(addr, val);
871#else
872 typedef union u_hold {
873 float a;
874 unsigned int b;
875 } u_hold_t;
876 u_hold_t u{val};
877 bool neg_zero = 0x80000000U == u.b;
878 #if __has_builtin(__hip_atomic_load) && \
879 __has_builtin(__hip_atomic_compare_exchange_strong)
880 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
881 bool done = false;
882 while (!done && (value < val || (neg_zero && value == 0.0f))) {
883 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
884 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
885 }
886 return value;
887 #else
888 unsigned int *uaddr = (unsigned int *)addr;
889 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
890 bool done = false;
891 while (!done && (__uint_as_float(value) < val || (neg_zero && __uint_as_float(value) == 0.0f))) {
892 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
893 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
894 }
895 return __uint_as_float(value);
896 #endif
897#endif
898}
899
900__device__
901inline
902float atomicMax_system(float* address, float val) {
903 unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
904 #if __has_builtin(__hip_atomic_load)
905 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
906 #else
907 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
908 #endif
909 float value = __uint_as_float(tmp);
910
911 while (value < val) {
912 value = atomicCAS_system(address, value, val);
913 }
914
915 return value;
916}
917
918__device__
919inline
920double atomicMax(double* addr, double val) {
921#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
922 return unsafeAtomicMax(addr, val);
923#else
924 typedef union u_hold {
925 double a;
926 unsigned long long b;
927 } u_hold_t;
928 u_hold_t u{val};
929 bool neg_zero = 0x8000000000000000ULL == u.b;
930 #if __has_builtin(__hip_atomic_load) && \
931 __has_builtin(__hip_atomic_compare_exchange_strong)
932 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
933 bool done = false;
934 while (!done && (value < val || (neg_zero && value == 0.0))) {
935 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
936 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
937 }
938 return value;
939 #else
940 unsigned long long *uaddr = (unsigned long long *)addr;
941 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
942 bool done = false;
943 while (!done &&
944 (__longlong_as_double(value) < val || (neg_zero && __longlong_as_double(value) == 0.0))) {
945 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
946 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
947 }
948 return __longlong_as_double(value);
949 #endif
950#endif
951}
952
953__device__
954inline
955double atomicMax_system(double* address, double val) {
956 unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
957 #if __has_builtin(__hip_atomic_load)
958 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
959 #else
960 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
961 #endif
962 double value = __longlong_as_double(tmp);
963
964 while (value < val) {
965 value = atomicCAS_system(address, value, val);
966 }
967
968 return value;
969}
970
971__device__
972inline
973unsigned int atomicInc(unsigned int* address, unsigned int val)
974{
975#if defined(__gfx941__)
976 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
977 address,
978 val,
979 [](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
980 [=]() {
981 return
982 __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
983 });
984#else
985 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
986#endif // __gfx941__
987
988}
989
990__device__
991inline
992unsigned int atomicDec(unsigned int* address, unsigned int val)
993{
994#if defined(__gfx941__)
995 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
996 address,
997 val,
998 [](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
999 [=]() {
1000 return
1001 __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
1002 });
1003#else
1004 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
1005#endif // __gfx941__
1006
1007}
1008
1009__device__
1010inline
1011int atomicAnd(int* address, int val) {
1012#if defined(__gfx941__)
1013 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1014 address, val, [](int& x, int y) { x &= y; }, [=]() {
1015 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1016 __HIP_MEMORY_SCOPE_AGENT);
1017 });
1018#else
1019 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1020#endif // __gfx941__
1021}
1022
1023__device__
1024inline
1025int atomicAnd_system(int* address, int val) {
1026#if defined(__gfx941__)
1027 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1028 address, val, [](int& x, int y) { x &= y; }, [=]() {
1029 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1030 __HIP_MEMORY_SCOPE_SYSTEM);
1031 });
1032#else
1033 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1034#endif // __gfx941__
1035}
1036
1037__device__
1038inline
1039unsigned int atomicAnd(unsigned int* address, unsigned int val) {
1040#if defined(__gfx941__)
1041 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1042 address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
1043 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1044 __HIP_MEMORY_SCOPE_AGENT);
1045 });
1046#else
1047 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1048#endif // __gfx941__
1049}
1050
1051__device__
1052inline
1053unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
1054#if defined(__gfx941__)
1055 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1056 address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
1057 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1058 __HIP_MEMORY_SCOPE_SYSTEM);
1059 });
1060#else
1061 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1062#endif // __gfx941__
1063}
1064
1065__device__
1066inline
1067unsigned long atomicAnd(unsigned long* address, unsigned long val) {
1068#if defined(__gfx941__)
1069 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1070 address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
1071 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1072 __HIP_MEMORY_SCOPE_AGENT);
1073 });
1074#else
1075 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1076#endif // __gfx941__
1077}
1078
1079__device__
1080inline
1081unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
1082#if defined(__gfx941__)
1083 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1084 address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
1085 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1086 __HIP_MEMORY_SCOPE_SYSTEM);
1087 });
1088#else
1089 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1090#endif // __gfx941__
1091}
1092
1093__device__
1094inline
1095unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
1096#if defined(__gfx941__)
1097 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1098 address,
1099 val,
1100 [](unsigned long long& x, unsigned long long y) { x &= y; },
1101 [=]() {
1102 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1103 __HIP_MEMORY_SCOPE_AGENT);
1104 });
1105#else
1106 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1107#endif // __gfx941__
1108}
1109
1110__device__
1111inline
1112unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
1113#if defined(__gfx941__)
1114 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1115 address,
1116 val,
1117 [](unsigned long long& x, unsigned long long y) { x &= y; },
1118 [=]() {
1119 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1120 __HIP_MEMORY_SCOPE_SYSTEM);
1121 });
1122#else
1123 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1124#endif // __gfx941__
1125}
1126
1127__device__
1128inline
1129int atomicOr(int* address, int val) {
1130#if defined(__gfx941__)
1131 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1132 address, val, [](int& x, int y) { x |= y; }, [=]() {
1133 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1134 __HIP_MEMORY_SCOPE_AGENT);
1135 });
1136#else
1137 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1138#endif // __gfx941__
1139}
1140
1141__device__
1142inline
1143int atomicOr_system(int* address, int val) {
1144#if defined(__gfx941__)
1145 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1146 address, val, [](int& x, int y) { x |= y; }, [=]() {
1147 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1148 __HIP_MEMORY_SCOPE_SYSTEM);
1149 });
1150#else
1151 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1152#endif // __gfx941__
1153}
1154
1155__device__
1156inline
1157unsigned int atomicOr(unsigned int* address, unsigned int val) {
1158#if defined(__gfx941__)
1159 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1160 address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
1161 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1162 __HIP_MEMORY_SCOPE_AGENT);
1163 });
1164#else
1165 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1166#endif // __gfx941__
1167}
1168
1169__device__
1170inline
1171unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
1172#if defined(__gfx941__)
1173 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1174 address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
1175 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1176 __HIP_MEMORY_SCOPE_SYSTEM);
1177 });
1178#else
1179 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1180#endif // __gfx941__
1181}
1182
1183__device__
1184inline
1185unsigned long atomicOr(unsigned long* address, unsigned long val) {
1186#if defined(__gfx941__)
1187 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1188 address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
1189 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1190 __HIP_MEMORY_SCOPE_AGENT);
1191 });
1192#else
1193 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1194#endif // __gfx941__
1195}
1196
1197__device__
1198inline
1199unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
1200#if defined(__gfx941__)
1201 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1202 address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
1203 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1204 __HIP_MEMORY_SCOPE_SYSTEM);
1205 });
1206#else
1207 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1208#endif // __gfx941__
1209}
1210
1211__device__
1212inline
1213unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
1214#if defined(__gfx941__)
1215 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1216 address,
1217 val,
1218 [](unsigned long long& x, unsigned long long y) { x |= y; },
1219 [=]() {
1220 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1221 __HIP_MEMORY_SCOPE_AGENT);
1222 });
1223#else
1224 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1225#endif // __gfx941__
1226}
1227
1228__device__
1229inline
1230unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
1231#if defined(__gfx941__)
1232 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1233 address,
1234 val,
1235 [](unsigned long long& x, unsigned long long y) { x |= y; },
1236 [=]() {
1237 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1238 __HIP_MEMORY_SCOPE_SYSTEM);
1239 });
1240#else
1241 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1242#endif // __gfx941__
1243}
1244
1245__device__
1246inline
1247int atomicXor(int* address, int val) {
1248#if defined(__gfx941__)
1249 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1250 address, val, [](int& x, int y) { x ^= y; }, [=]() {
1251 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1252 __HIP_MEMORY_SCOPE_AGENT);
1253 });
1254#else
1255 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1256#endif // __gfx941__
1257}
1258
1259__device__
1260inline
1261int atomicXor_system(int* address, int val) {
1262#if defined(__gfx941__)
1263 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1264 address, val, [](int& x, int y) { x ^= y; }, [=]() {
1265 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1266 __HIP_MEMORY_SCOPE_SYSTEM);
1267 });
1268#else
1269 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1270#endif // __gfx941__
1271}
1272
1273__device__
1274inline
1275unsigned int atomicXor(unsigned int* address, unsigned int val) {
1276#if defined(__gfx941__)
1277 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1278 address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
1279 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1280 __HIP_MEMORY_SCOPE_AGENT);
1281 });
1282#else
1283 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1284#endif // __gfx941__
1285}
1286
1287__device__
1288inline
1289unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
1290#if defined(__gfx941__)
1291 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1292 address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
1293 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1294 __HIP_MEMORY_SCOPE_SYSTEM);
1295 });
1296#else
1297 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1298#endif // __gfx941__
1299}
1300
1301__device__
1302inline
1303unsigned long atomicXor(unsigned long* address, unsigned long val) {
1304#if defined(__gfx941__)
1305 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1306 address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
1307 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1308 __HIP_MEMORY_SCOPE_AGENT);
1309 });
1310#else
1311 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1312#endif // __gfx941__
1313}
1314
1315__device__
1316inline
1317unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
1318#if defined(__gfx941__)
1319 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1320 address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
1321 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1322 __HIP_MEMORY_SCOPE_SYSTEM);
1323 });
1324#else
1325 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1326#endif // __gfx941__
1327}
1328
1329__device__
1330inline
1331unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
1332#if defined(__gfx941__)
1333 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1334 address,
1335 val,
1336 [](unsigned long long& x, unsigned long long y) { x ^= y; },
1337 [=]() {
1338 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1339 __HIP_MEMORY_SCOPE_AGENT);
1340 });
1341#else
1342 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1343#endif // __gfx941__
1344}
1345
1346__device__
1347inline
1348unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) {
1349 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1350}
1351
1352#else // __hip_atomic_compare_exchange_strong
1353
1354__device__
1355inline
1356int atomicCAS(int* address, int compare, int val)
1357{
1358 __atomic_compare_exchange_n(
1359 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1360
1361 return compare;
1362}
1363__device__
1364inline
1365unsigned int atomicCAS(
1366 unsigned int* address, unsigned int compare, unsigned int val)
1367{
1368 __atomic_compare_exchange_n(
1369 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1370
1371 return compare;
1372}
1373__device__
1374inline
1375unsigned long long atomicCAS(
1376 unsigned long long* address,
1377 unsigned long long compare,
1378 unsigned long long val)
1379{
1380 __atomic_compare_exchange_n(
1381 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1382
1383 return compare;
1384}
1385
1386__device__
1387inline
1388int atomicAdd(int* address, int val)
1389{
1390 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1391}
1392__device__
1393inline
1394unsigned int atomicAdd(unsigned int* address, unsigned int val)
1395{
1396 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1397}
1398__device__
1399inline
1400unsigned long long atomicAdd(
1401 unsigned long long* address, unsigned long long val)
1402{
1403 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1404}
1405__device__
1406inline
1407float atomicAdd(float* address, float val)
1408{
1409#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1410 return unsafeAtomicAdd(address, val);
1411#else
1412 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1413#endif
1414}
1415
1416#if !defined(__HIPCC_RTC__)
1417DEPRECATED("use atomicAdd instead")
1418#endif // !defined(__HIPCC_RTC__)
1419__device__
1420inline
1421void atomicAddNoRet(float* address, float val)
1422{
1423 __ockl_atomic_add_noret_f32(address, val);
1424}
1425
1426__device__
1427inline
1428double atomicAdd(double* address, double val)
1429{
1430#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1431 return unsafeAtomicAdd(address, val);
1432#else
1433 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1434#endif
1435}
1436
1437__device__
1438inline
1439int atomicSub(int* address, int val)
1440{
1441 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1442}
1443__device__
1444inline
1445unsigned int atomicSub(unsigned int* address, unsigned int val)
1446{
1447 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1448}
1449
1450__device__
1451inline
1452int atomicExch(int* address, int val)
1453{
1454 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1455}
1456__device__
1457inline
1458unsigned int atomicExch(unsigned int* address, unsigned int val)
1459{
1460 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1461}
1462__device__
1463inline
1464unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
1465{
1466 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1467}
1468__device__
1469inline
1470float atomicExch(float* address, float val)
1471{
1472 return __uint_as_float(__atomic_exchange_n(
1473 reinterpret_cast<unsigned int*>(address),
1474 __float_as_uint(val),
1475 __ATOMIC_RELAXED));
1476}
1477
1478__device__
1479inline
1480int atomicMin(int* address, int val)
1481{
1482 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1483}
1484__device__
1485inline
1486unsigned int atomicMin(unsigned int* address, unsigned int val)
1487{
1488 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1489}
1490__device__
1491inline
1492unsigned long long atomicMin(
1493 unsigned long long* address, unsigned long long val)
1494{
1495 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1496 while (val < tmp) {
1497 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1498
1499 if (tmp1 != tmp) { tmp = tmp1; continue; }
1500
1501 tmp = atomicCAS(address, tmp, val);
1502 }
1503
1504 return tmp;
1505}
1506__device__ inline long long atomicMin(long long* address, long long val) {
1507 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1508 while (val < tmp) {
1509 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1510
1511 if (tmp1 != tmp) {
1512 tmp = tmp1;
1513 continue;
1514 }
1515
1516 tmp = atomicCAS(address, tmp, val);
1517 }
1518 return tmp;
1519}
1520
1521__device__
1522inline
1523int atomicMax(int* address, int val)
1524{
1525 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1526}
1527__device__
1528inline
1529unsigned int atomicMax(unsigned int* address, unsigned int val)
1530{
1531 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1532}
1533__device__
1534inline
1535unsigned long long atomicMax(
1536 unsigned long long* address, unsigned long long val)
1537{
1538 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1539 while (tmp < val) {
1540 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1541
1542 if (tmp1 != tmp) { tmp = tmp1; continue; }
1543
1544 tmp = atomicCAS(address, tmp, val);
1545 }
1546
1547 return tmp;
1548}
1549__device__ inline long long atomicMax(long long* address, long long val) {
1550 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1551 while (tmp < val) {
1552 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1553
1554 if (tmp1 != tmp) {
1555 tmp = tmp1;
1556 continue;
1557 }
1558
1559 tmp = atomicCAS(address, tmp, val);
1560 }
1561 return tmp;
1562}
1563
1564__device__
1565inline
1566unsigned int atomicInc(unsigned int* address, unsigned int val)
1567{
1568 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
1569}
1570
1571__device__
1572inline
1573unsigned int atomicDec(unsigned int* address, unsigned int val)
1574{
1575 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
1576}
1577
1578__device__
1579inline
1580int atomicAnd(int* address, int val)
1581{
1582 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1583}
1584__device__
1585inline
1586unsigned int atomicAnd(unsigned int* address, unsigned int val)
1587{
1588 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1589}
1590__device__
1591inline
1592unsigned long long atomicAnd(
1593 unsigned long long* address, unsigned long long val)
1594{
1595 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1596}
1597
1598__device__
1599inline
1600int atomicOr(int* address, int val)
1601{
1602 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1603}
1604__device__
1605inline
1606unsigned int atomicOr(unsigned int* address, unsigned int val)
1607{
1608 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1609}
1610__device__
1611inline
1612unsigned long long atomicOr(
1613 unsigned long long* address, unsigned long long val)
1614{
1615 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1616}
1617
1618__device__
1619inline
1620int atomicXor(int* address, int val)
1621{
1622 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1623}
1624__device__
1625inline
1626unsigned int atomicXor(unsigned int* address, unsigned int val)
1627{
1628 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1629}
1630__device__
1631inline
1632unsigned long long atomicXor(
1633 unsigned long long* address, unsigned long long val)
1634{
1635 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1636}
1637
1638#endif // __hip_atomic_compare_exchange_strong