Halide  20.0.0
Halide compiler and libraries
runtime_atomics.h
Go to the documentation of this file.
1 #ifndef HALIDE_RUNTIME_RUNTIME_ATOMICS_H
2 #define HALIDE_RUNTIME_RUNTIME_ATOMICS_H
3 
4 // This file provides an abstraction layer over the __sync/__atomic builtins
5 // in Clang; for various reasons, we use __sync for 32-bit targets, and
6 // __atomic for 64-bit. At some point it may be desirable/necessary to
7 // migrate 32-bit to __atomic as well, at which time this file can
8 // likely go away. See https://github.com/halide/Halide/issues/7431 for
9 // a discussion of the history and issues as to why we work this way.
10 
11 #include "HalideRuntime.h"
12 
13 namespace Halide {
14 namespace Runtime {
15 namespace Internal {
16 namespace Synchronization {
17 
18 namespace {
19 
20 // TODO: most of these wrappers should do the remove_volatile for secondary arguments;
21 // I've only put it in place for the locations necessary at this time.
22 template<class T>
23 struct remove_volatile {
24  typedef T type;
25 };
26 template<class T>
27 struct remove_volatile<volatile T> {
28  typedef T type;
29 };
30 
31 #ifdef BITS_32
32 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
33  return __sync_and_and_fetch(addr, val);
34 }
35 
36 template<typename T>
37 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
38  return __sync_fetch_and_add(addr, val);
39 }
40 
41 template<typename T, typename TV = typename remove_volatile<T>::type>
42 ALWAYS_INLINE TV atomic_fetch_add_sequentially_consistent(T *addr, TV val) {
43  return __sync_fetch_and_add(addr, val);
44 }
45 
46 template<typename T, typename TV = typename remove_volatile<T>::type>
47 ALWAYS_INLINE TV atomic_fetch_sub_sequentially_consistent(T *addr, TV val) {
48  return __sync_fetch_and_sub(addr, val);
49 }
50 
51 template<typename T, typename TV = typename remove_volatile<T>::type>
52 ALWAYS_INLINE TV atomic_fetch_or_sequentially_consistent(T *addr, TV val) {
53  return __sync_fetch_and_or(addr, val);
54 }
55 
56 template<typename T, typename TV = typename remove_volatile<T>::type>
57 ALWAYS_INLINE TV atomic_add_fetch_sequentially_consistent(T *addr, TV val) {
58  return __sync_add_and_fetch(addr, val);
59 }
60 
61 template<typename T, typename TV = typename remove_volatile<T>::type>
62 ALWAYS_INLINE TV atomic_sub_fetch_sequentially_consistent(T *addr, TV val) {
63  return __sync_sub_and_fetch(addr, val);
64 }
65 
66 template<typename T, typename TV = typename remove_volatile<T>::type>
67 ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, TV *expected, TV *desired) {
68  TV oldval = *expected;
69  TV gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
70  *expected = gotval;
71  return oldval == gotval;
72 }
73 
74 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
75  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
76 }
77 
78 template<typename T, typename TV = typename remove_volatile<T>::type>
79 ALWAYS_INLINE bool atomic_cas_strong_sequentially_consistent(T *addr, TV *expected, TV *desired) {
80  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
81 }
82 
83 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
84  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
85 }
86 
87 template<typename T>
88 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
89  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
90 }
91 
92 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
93  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
94 }
95 
96 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
97  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
98 }
99 
100 template<typename T>
101 ALWAYS_INLINE T atomic_fetch_and_release(T *addr, T val) {
102  return __sync_fetch_and_and(addr, val);
103 }
104 
105 template<typename T, typename TV = typename remove_volatile<T>::type>
106 ALWAYS_INLINE TV atomic_fetch_and_sequentially_consistent(T *addr, TV val) {
107  return __sync_fetch_and_and(addr, val);
108 }
109 
110 template<typename T>
111 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
112  *val = *addr;
113 }
114 
115 template<typename T>
116 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
117  __sync_synchronize();
118  *val = *addr;
119 }
120 
121 template<typename T>
122 ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) {
123  // Despite the name, this is really just an exchange operation with acquire ordering.
124  return __sync_lock_test_and_set(addr, val);
125 }
126 
127 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
128  return __sync_or_and_fetch(addr, val);
129 }
130 
131 template<typename T>
132 ALWAYS_INLINE void atomic_store_relaxed(T *addr, T *val) {
133  *addr = *val;
134 }
135 
136 template<typename T>
137 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
138  *addr = *val;
139  __sync_synchronize();
140 }
141 
142 template<typename T, typename TV = typename remove_volatile<T>::type>
143 ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) {
144  *addr = *val;
145  __sync_synchronize();
146 }
147 
148 ALWAYS_INLINE void atomic_thread_fence_acquire() {
149  __sync_synchronize();
150 }
151 
152 ALWAYS_INLINE void atomic_thread_fence_sequentially_consistent() {
153  __sync_synchronize();
154 }
155 
156 #else
157 
158 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
159  return __atomic_and_fetch(addr, val, __ATOMIC_RELEASE);
160 }
161 
162 template<typename T>
163 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
164  return __atomic_fetch_add(addr, val, __ATOMIC_ACQ_REL);
165 }
166 
167 template<typename T, typename TV = typename remove_volatile<T>::type>
168 ALWAYS_INLINE TV atomic_fetch_add_sequentially_consistent(T *addr, TV val) {
169  return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST);
170 }
171 
172 template<typename T, typename TV = typename remove_volatile<T>::type>
173 ALWAYS_INLINE TV atomic_fetch_sub_sequentially_consistent(T *addr, TV val) {
174  return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST);
175 }
176 
177 template<typename T, typename TV = typename remove_volatile<T>::type>
178 ALWAYS_INLINE TV atomic_fetch_or_sequentially_consistent(T *addr, TV val) {
179  return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST);
180 }
181 
182 template<typename T, typename TV = typename remove_volatile<T>::type>
183 ALWAYS_INLINE TV atomic_add_fetch_sequentially_consistent(T *addr, TV val) {
184  return __atomic_add_fetch(addr, val, __ATOMIC_SEQ_CST);
185 }
186 
187 template<typename T, typename TV = typename remove_volatile<T>::type>
188 ALWAYS_INLINE TV atomic_sub_fetch_sequentially_consistent(T *addr, TV val) {
189  return __atomic_sub_fetch(addr, val, __ATOMIC_SEQ_CST);
190 }
191 
192 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
193  return __atomic_compare_exchange(addr, expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
194 }
195 
196 template<typename T, typename TV = typename remove_volatile<T>::type>
197 ALWAYS_INLINE bool atomic_cas_strong_sequentially_consistent(T *addr, TV *expected, TV *desired) {
198  return __atomic_compare_exchange(addr, expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
199 }
200 
201 template<typename T>
202 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
203  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
204 }
205 
206 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
207  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
208 }
209 
210 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
211  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
212 }
213 
214 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
215  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
216 }
217 
218 template<typename T>
219 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(T *addr, T val) {
220  return __atomic_fetch_and(addr, val, __ATOMIC_RELEASE);
221 }
222 
223 template<typename T, typename TV = typename remove_volatile<T>::type>
224 ALWAYS_INLINE uintptr_t atomic_fetch_and_sequentially_consistent(T *addr, TV val) {
225  return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST);
226 }
227 
228 template<typename T>
229 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
230  __atomic_load(addr, val, __ATOMIC_RELAXED);
231 }
232 
233 template<typename T>
234 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
235  __atomic_load(addr, val, __ATOMIC_ACQUIRE);
236  __sync_synchronize();
237  *val = *addr;
238 }
239 
240 template<typename T>
241 ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) {
242  T result;
243  __atomic_exchange(addr, &val, &result, __ATOMIC_ACQUIRE);
244  return result;
245 }
246 
247 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
248  return __atomic_or_fetch(addr, val, __ATOMIC_RELAXED);
249 }
250 
251 template<typename T>
252 ALWAYS_INLINE void atomic_store_relaxed(T *addr, T *val) {
253  __atomic_store(addr, val, __ATOMIC_RELAXED);
254 }
255 
256 template<typename T>
257 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
258  __atomic_store(addr, val, __ATOMIC_RELEASE);
259 }
260 
261 template<typename T, typename TV = typename remove_volatile<T>::type>
262 ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) {
263  __atomic_store(addr, val, __ATOMIC_SEQ_CST);
264 }
265 
266 ALWAYS_INLINE void atomic_thread_fence_acquire() {
267  __atomic_thread_fence(__ATOMIC_ACQUIRE);
268 }
269 
270 ALWAYS_INLINE void atomic_thread_fence_sequentially_consistent() {
271  __atomic_thread_fence(__ATOMIC_SEQ_CST);
272 }
273 
274 #endif
275 
276 } // namespace
277 
278 } // namespace Synchronization
279 } // namespace Internal
280 } // namespace Runtime
281 } // namespace Halide
282 
283 #endif // HALIDE_RUNTIME_RUNTIME_ATOMICS_H
This file declares the routines used by Halide internally in its runtime.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
__UINTPTR_TYPE__ uintptr_t
#define ALWAYS_INLINE