Halide  20.0.0
Halide compiler and libraries
HalideBuffer.h
Go to the documentation of this file.
1 /** \file
2  * Defines a Buffer type that wraps from halide_buffer_t and adds
3  * functionality, and methods for more conveniently iterating over the
4  * samples in a halide_buffer_t outside of Halide code. */
5 
6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
8 
9 #include <algorithm>
10 #include <atomic>
11 #include <cassert>
12 #include <cstdint>
13 #include <cstdlib>
14 #include <cstring>
15 #include <limits>
16 #include <memory>
17 #include <type_traits>
18 #include <vector>
19 
20 #ifdef __APPLE__
21 #include <AvailabilityVersions.h>
22 #include <TargetConditionals.h>
23 #endif
24 
25 #if defined(__has_feature)
26 #if __has_feature(memory_sanitizer)
27 #include <sanitizer/msan_interface.h>
28 #endif
29 #endif
30 
31 #include "HalideRuntime.h"
32 
33 #ifdef _MSC_VER
34 #include <malloc.h>
35 #define HALIDE_ALLOCA _alloca
36 #else
37 #define HALIDE_ALLOCA __builtin_alloca
38 #endif
39 
40 // gcc 5.1 has a false positive warning on this code
41 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
42 #pragma GCC diagnostic ignored "-Warray-bounds"
43 #endif
44 
45 #ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
46 #define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
47 #endif
48 
49 #ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
50 // Conservatively align buffer allocations to 128 bytes by default.
51 // This is enough alignment for all the platforms currently in use.
52 // Redefine this in your compiler settings if you desire more/less alignment.
53 #define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128
54 #endif
55 
57  "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
58 
59 // Unfortunately, not all C++17 runtimes support aligned_alloc
60 // (it may depends on OS/SDK version); this is provided as an opt-out
61 // if you are compiling on a platform that doesn't provide a (good)
62 // implementation. (Note that we actually use the C11 `::aligned_alloc()`
63 // rather than the C++17 `std::aligned_alloc()` because at least one platform
64 // we found supports the former but not the latter.)
65 #ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
66 
67 // clang-format off
68 #ifdef _MSC_VER
69 
70  // MSVC doesn't implement aligned_alloc(), even in C++17 mode, and
71  // has stated they probably never will, so, always default it off here.
72  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
73 
74 #elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
75 
76  // Android doesn't provide aligned_alloc until API 28
77  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
78 
79 #elif defined(__APPLE__)
80 
81  #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15)
82 
83  // macOS doesn't provide aligned_alloc until 10.15
84  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
85 
86  #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0)
87 
88  // iOS doesn't provide aligned_alloc until 14.0
89  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
90 
91  #else
92 
93  // Assume it's ok on all other Apple targets
94  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
95 
96  #endif
97 
98 #else
99 
100  #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)
101 
102  // ARM GNU-A baremetal compiler doesn't provide aligned_alloc as of 12.2
103  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
104 
105  #else
106 
107  // Not Windows, Android, or Apple: just assume it's ok
108  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
109 
110  #endif
111 
112 #endif
113 // clang-format on
114 
115 #endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
116 
117 namespace Halide {
118 namespace Runtime {
119 
120 // Forward-declare our Buffer class
121 template<typename T, int Dims, int InClassDimStorage>
122 class Buffer;
123 
124 // A helper to check if a parameter pack is entirely implicitly
125 // int-convertible to use with std::enable_if
126 template<typename... Args>
127 struct AllInts : std::false_type {};
128 
129 template<>
130 struct AllInts<> : std::true_type {};
131 
132 template<typename T, typename... Args>
133 struct AllInts<T, Args...> {
134  static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
135 };
136 
137 // Floats and doubles are technically implicitly int-convertible, but
138 // doing so produces a warning we treat as an error, so just disallow
139 // it here.
140 template<typename... Args>
141 struct AllInts<float, Args...> : std::false_type {};
142 
143 template<typename... Args>
144 struct AllInts<double, Args...> : std::false_type {};
145 
146 namespace Internal {
147 // A helper to detect if there are any zeros in a container
148 template<typename Container>
149 bool any_zero(const Container &c) {
150  for (int i : c) {
151  if (i == 0) {
152  return true;
153  }
154  }
155  return false;
156 }
157 
159  static inline void *(*default_allocate_fn)(size_t) = nullptr;
160  static inline void (*default_deallocate_fn)(void *) = nullptr;
161 };
162 } // namespace Internal
163 
164 /** A struct acting as a header for allocations owned by the Buffer
165  * class itself. */
167  void (*deallocate_fn)(void *);
168  std::atomic<int> ref_count;
169 
170  // Note that ref_count always starts at 1
171  explicit AllocationHeader(void (*deallocate_fn)(void *))
173  }
174 };
175 
176 /** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
177 enum struct BufferDeviceOwnership : int {
178  Allocated, ///> halide_device_free will be called when device ref count goes to zero
179  WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
180  Unmanaged, ///> No free routine will be called when device ref count goes to zero
181  AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
182  Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
183 };
184 
185 /** A similar struct for managing device allocations. */
187  // This is only ever constructed when there's something to manage,
188  // so start at one.
189  std::atomic<int> count{1};
191 };
192 
193 constexpr int AnyDims = -1;
194 
195 /** A templated Buffer class that wraps halide_buffer_t and adds
196  * functionality. When using Halide from C++, this is the preferred
197  * way to create input and output buffers. The overhead of using this
198  * class relative to a naked halide_buffer_t is minimal - it uses another
199  * ~16 bytes on the stack, and does no dynamic allocations when using
200  * it to represent existing memory of a known maximum dimensionality.
201  *
202  * The template parameter T is the element type. For buffers where the
203  * element type is unknown, or may vary, use void or const void.
204  *
205  * The template parameter Dims is the number of dimensions. For buffers where
206  * the dimensionality type is unknown at, or may vary, use AnyDims.
207  *
208  * InClassDimStorage is the maximum number of dimensions that can be represented
209  * using space inside the class itself. Set it to the maximum dimensionality
210  * you expect this buffer to be. If the actual dimensionality exceeds
211  * this, heap storage is allocated to track the shape of the buffer.
212  * InClassDimStorage defaults to 4, which should cover nearly all usage.
213  *
214  * The class optionally allocates and owns memory for the image using
215  * a shared pointer allocated with the provided allocator. If they are
216  * null, malloc and free are used. Any device-side allocation is
217  * considered as owned if and only if the host-side allocation is
218  * owned. */
219 template<typename T = void,
220  int Dims = AnyDims,
221  int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
222 class Buffer {
223  /** The underlying halide_buffer_t */
224  halide_buffer_t buf = {};
225 
226  /** Some in-class storage for shape of the dimensions. */
227  halide_dimension_t shape[InClassDimStorage];
228 
229  /** The allocation owned by this Buffer. NULL if the Buffer does not
230  * own the memory. */
231  AllocationHeader *alloc = nullptr;
232 
233  /** A reference count for the device allocation owned by this
234  * buffer. */
235  mutable DeviceRefCount *dev_ref_count = nullptr;
236 
237  /** True if T is of type void or const void */
238  static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
239 
240  /** A type function that adds a const qualifier if T is a const type. */
241  template<typename T2>
242  using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
243 
244  /** T unless T is (const) void, in which case (const)
245  * uint8_t. Useful for providing return types for operator() */
246  using not_void_T = typename std::conditional<T_is_void,
247  add_const_if_T_is_const<uint8_t>,
248  T>::type;
249 
250  /** T with constness removed. Useful for return type of copy(). */
251  using not_const_T = typename std::remove_const<T>::type;
252 
253  /** The type the elements are stored as. Equal to not_void_T
254  * unless T is a pointer, in which case uint64_t. Halide stores
255  * all pointer types as uint64s internally, even on 32-bit
256  * systems. */
257  using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
258 
259 public:
260  /** True if the Halide type is not void (or const void). */
261  static constexpr bool has_static_halide_type = !T_is_void;
262 
263  /** Get the Halide type of T. Callers should not use the result if
264  * has_static_halide_type is false. */
265  static constexpr halide_type_t static_halide_type() {
266  return halide_type_of<typename std::remove_cv<not_void_T>::type>();
267  }
268 
269  /** Does this Buffer own the host memory it refers to? */
270  bool owns_host_memory() const {
271  return alloc != nullptr;
272  }
273 
274  static constexpr bool has_static_dimensions = (Dims != AnyDims);
275 
276  /** Callers should not use the result if
277  * has_static_dimensions is false. */
278  static constexpr int static_dimensions() {
279  return Dims;
280  }
281 
282  static_assert(!has_static_dimensions || static_dimensions() >= 0);
283 
284 private:
285  /** Increment the reference count of any owned allocation */
286  void incref() const {
287  if (owns_host_memory()) {
288  alloc->ref_count++;
289  }
290  if (buf.device) {
291  if (!dev_ref_count) {
292  // I seem to have a non-zero dev field but no
293  // reference count for it. I must have been given a
294  // device allocation by a Halide pipeline, and have
295  // never been copied from since. Take sole ownership
296  // of it.
297  dev_ref_count = new DeviceRefCount;
298  }
299  dev_ref_count->count++;
300  }
301  }
302 
303  // Note that this is called "cropped" but can also encompass a slice/embed
304  // operation as well.
305  struct DevRefCountCropped : DeviceRefCount {
306  // We will only store Buffers that have a dynamic number of dimensions.
307  // Buffers that cropped or sliced from need to be first converted to
308  // one with variable size. This is required because we cannot possibly
309  // know what the actual dimensionality is of the buffer this is a
310  // crop or slice from. Since cropping a sliced buffer is also possible,
311  // no optimizations can be made for cropped buffers either.
312  Buffer<T, AnyDims> cropped_from;
313  explicit DevRefCountCropped(const Buffer<T, AnyDims> &cropped_from)
314  : cropped_from(cropped_from) {
315  ownership = BufferDeviceOwnership::Cropped;
316  }
317  };
318 
319  /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
320  void crop_from(const Buffer<T, AnyDims> &cropped_from) {
321  assert(dev_ref_count == nullptr);
322  dev_ref_count = new DevRefCountCropped(cropped_from);
323  }
324 
325  /** Decrement the reference count of any owned allocation and free host
326  * and device memory if it hits zero. Sets alloc to nullptr. */
327  void decref(bool device_only = false) {
328  if (owns_host_memory() && !device_only) {
329  int new_count = --(alloc->ref_count);
330  if (new_count == 0) {
331  void (*fn)(void *) = alloc->deallocate_fn;
332  alloc->~AllocationHeader();
333  fn(alloc);
334  }
335  buf.host = nullptr;
336  alloc = nullptr;
337  set_host_dirty(false);
338  }
339  int new_count = 0;
340  if (dev_ref_count) {
341  new_count = --(dev_ref_count->count);
342  }
343  if (new_count == 0) {
344  if (buf.device) {
345  assert(!(alloc && device_dirty()) &&
346  "Implicitly freeing a dirty device allocation while a host allocation still lives. "
347  "Call device_free explicitly if you want to drop dirty device-side data. "
348  "Call copy_to_host explicitly if you want the data copied to the host allocation "
349  "before the device allocation is freed.");
350  int result = halide_error_code_success;
351  if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
352  result = buf.device_interface->detach_native(nullptr, &buf);
353  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
354  result = buf.device_interface->device_and_host_free(nullptr, &buf);
355  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
356  result = buf.device_interface->device_release_crop(nullptr, &buf);
357  } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
358  result = buf.device_interface->device_free(nullptr, &buf);
359  }
360  // No reasonable way to return the error, but we can at least assert-fail in debug builds.
361  assert((result == halide_error_code_success) && "device_interface call returned a nonzero result in Buffer::decref()");
362  (void)result;
363  }
364  if (dev_ref_count) {
365  if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
366  delete (DevRefCountCropped *)dev_ref_count;
367  } else {
368  delete dev_ref_count;
369  }
370  }
371  }
372  dev_ref_count = nullptr;
373  buf.device = 0;
374  buf.device_interface = nullptr;
375  }
376 
377  void free_shape_storage() {
378  if (buf.dim != shape) {
379  delete[] buf.dim;
380  buf.dim = nullptr;
381  }
382  }
383 
384  template<int DimsSpecified>
385  void make_static_shape_storage() {
386  static_assert(Dims == AnyDims || Dims == DimsSpecified,
387  "Number of arguments to Buffer() does not match static dimensionality");
388  buf.dimensions = DimsSpecified;
389  if constexpr (Dims == AnyDims) {
390  if constexpr (DimsSpecified <= InClassDimStorage) {
391  buf.dim = shape;
392  } else {
393  static_assert(DimsSpecified >= 1);
394  buf.dim = new halide_dimension_t[DimsSpecified];
395  }
396  } else {
397  static_assert(InClassDimStorage >= Dims);
398  buf.dim = shape;
399  }
400  }
401 
402  void make_shape_storage(const int dimensions) {
403  if (Dims != AnyDims && Dims != dimensions) {
404  assert(false && "Number of arguments to Buffer() does not match static dimensionality");
405  }
406  // This should usually be inlined, so if dimensions is statically known,
407  // we can skip the call to new
408  buf.dimensions = dimensions;
409  buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
410  }
411 
412  void copy_shape_from(const halide_buffer_t &other) {
413  // All callers of this ensure that buf.dimensions == other.dimensions.
414  make_shape_storage(other.dimensions);
415  std::copy(other.dim, other.dim + other.dimensions, buf.dim);
416  }
417 
418  template<typename T2, int D2, int S2>
419  void move_shape_from(Buffer<T2, D2, S2> &&other) {
420  if (other.shape == other.buf.dim) {
421  copy_shape_from(other.buf);
422  } else {
423  buf.dim = other.buf.dim;
424  other.buf.dim = nullptr;
425  }
426  other.buf = halide_buffer_t();
427  }
428 
429  /** Initialize the shape from a halide_buffer_t. */
430  void initialize_from_buffer(const halide_buffer_t &b,
431  BufferDeviceOwnership ownership) {
432  memcpy(&buf, &b, sizeof(halide_buffer_t));
433  copy_shape_from(b);
434  if (b.device) {
435  dev_ref_count = new DeviceRefCount;
436  dev_ref_count->ownership = ownership;
437  }
438  }
439 
440  /** Initialize the shape from an array of ints */
441  void initialize_shape(const int *sizes) {
442  for (int i = 0; i < buf.dimensions; i++) {
443  buf.dim[i].min = 0;
444  buf.dim[i].extent = sizes[i];
445  if (i == 0) {
446  buf.dim[i].stride = 1;
447  } else {
448  buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
449  }
450  }
451  }
452 
453  /** Initialize the shape from a vector of extents */
454  void initialize_shape(const std::vector<int> &sizes) {
455  assert(buf.dimensions == (int)sizes.size());
456  initialize_shape(sizes.data());
457  }
458 
459  /** Initialize the shape from the static shape of an array */
460  template<typename Array, size_t N>
461  void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
462  buf.dim[next].min = 0;
463  buf.dim[next].extent = (int)N;
464  if (next == 0) {
465  buf.dim[next].stride = 1;
466  } else {
467  initialize_shape_from_array_shape(next - 1, vals[0]);
468  buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
469  }
470  }
471 
472  /** Base case for the template recursion above. */
473  template<typename T2>
474  void initialize_shape_from_array_shape(int, const T2 &) {
475  }
476 
477  /** Get the dimensionality of a multi-dimensional C array */
478  template<typename Array, size_t N>
479  static int dimensionality_of_array(Array (&vals)[N]) {
480  return dimensionality_of_array(vals[0]) + 1;
481  }
482 
483  template<typename T2>
484  static int dimensionality_of_array(const T2 &) {
485  return 0;
486  }
487 
488  /** Get the underlying halide_type_t of an array's element type. */
489  template<typename Array, size_t N>
490  static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
491  return scalar_type_of_array(vals[0]);
492  }
493 
494  template<typename T2>
495  static halide_type_t scalar_type_of_array(const T2 &) {
496  return halide_type_of<typename std::remove_cv<T2>::type>();
497  }
498 
499  /** Crop a single dimension without handling device allocation. */
500  void crop_host(int d, int min, int extent) {
501  assert(dim(d).min() <= min);
502  assert(dim(d).max() >= min + extent - 1);
503  ptrdiff_t shift = min - dim(d).min();
504  if (buf.host != nullptr) {
505  buf.host += (shift * dim(d).stride()) * type().bytes();
506  }
507  buf.dim[d].min = min;
508  buf.dim[d].extent = extent;
509  }
510 
511  /** Crop as many dimensions as are in rect, without handling device allocation. */
512  void crop_host(const std::vector<std::pair<int, int>> &rect) {
513  assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
514  int limit = (int)rect.size();
515  assert(limit <= dimensions());
516  for (int i = 0; i < limit; i++) {
517  crop_host(i, rect[i].first, rect[i].second);
518  }
519  }
520 
521  void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
522  assert(buf.device_interface != nullptr);
523  if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == halide_error_code_success) {
524  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
525  // is it possible to get to this point without incref having run at least once since
526  // the device field was set? (I.e. in the internal logic of crop. incref might have been
527  // called.)
528  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
529  result_host_cropped.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
530  } else {
531  result_host_cropped.crop_from(*this);
532  }
533  }
534  }
535 
536  /** slice a single dimension without handling device allocation. */
537  void slice_host(int d, int pos) {
538  static_assert(Dims == AnyDims);
539  assert(dimensions() > 0);
540  assert(d >= 0 && d < dimensions());
541  assert(pos >= dim(d).min() && pos <= dim(d).max());
542  buf.dimensions--;
543  ptrdiff_t shift = pos - buf.dim[d].min;
544  if (buf.host != nullptr) {
545  buf.host += (shift * buf.dim[d].stride) * type().bytes();
546  }
547  for (int i = d; i < buf.dimensions; i++) {
548  buf.dim[i] = buf.dim[i + 1];
549  }
550  buf.dim[buf.dimensions] = {0, 0, 0};
551  }
552 
553  void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
554  assert(buf.device_interface != nullptr);
555  if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == halide_error_code_success) {
556  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
557  // is it possible to get to this point without incref having run at least once since
558  // the device field was set? (I.e. in the internal logic of slice. incref might have been
559  // called.)
560  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
561  // crop_from() is correct here, despite the fact that we are slicing.
562  result_host_sliced.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
563  } else {
564  // crop_from() is correct here, despite the fact that we are slicing.
565  result_host_sliced.crop_from(*this);
566  }
567  }
568  }
569 
570 public:
571  typedef T ElemType;
572 
573  /** Read-only access to the shape */
574  class Dimension {
575  const halide_dimension_t &d;
576 
577  public:
578  /** The lowest coordinate in this dimension */
579  HALIDE_ALWAYS_INLINE int min() const {
580  return d.min;
581  }
582 
583  /** The number of elements in memory you have to step over to
584  * increment this coordinate by one. */
586  return d.stride;
587  }
588 
589  /** The extent of the image along this dimension */
591  return d.extent;
592  }
593 
594  /** The highest coordinate in this dimension */
595  HALIDE_ALWAYS_INLINE int max() const {
596  return min() + extent() - 1;
597  }
598 
599  /** An iterator class, so that you can iterate over
600  * coordinates in a dimensions using a range-based for loop. */
601  struct iterator {
602  int val;
603  int operator*() const {
604  return val;
605  }
606  bool operator!=(const iterator &other) const {
607  return val != other.val;
608  }
610  val++;
611  return *this;
612  }
613  };
614 
615  /** An iterator that points to the min coordinate */
617  return {min()};
618  }
619 
620  /** An iterator that points to one past the max coordinate */
622  return {min() + extent()};
623  }
624 
625  explicit Dimension(const halide_dimension_t &dim)
626  : d(dim) {
627  }
628  };
629 
630  /** Access the shape of the buffer */
632  assert(i >= 0 && i < this->dimensions());
633  return Dimension(buf.dim[i]);
634  }
635 
636  /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
637  // @{
638  int min(int i) const {
639  return dim(i).min();
640  }
641  int extent(int i) const {
642  return dim(i).extent();
643  }
644  int stride(int i) const {
645  return dim(i).stride();
646  }
647  // @}
648 
649  /** The total number of elements this buffer represents. Equal to
650  * the product of the extents */
651  size_t number_of_elements() const {
652  return buf.number_of_elements();
653  }
654 
655  /** Get the dimensionality of the buffer. */
656  int dimensions() const {
657  if constexpr (has_static_dimensions) {
658  return Dims;
659  } else {
660  return buf.dimensions;
661  }
662  }
663 
664  /** Get the type of the elements. */
665  halide_type_t type() const {
666  return buf.type;
667  }
668 
669  /** A pointer to the element with the lowest address. If all
670  * strides are positive, equal to the host pointer. */
671  T *begin() const {
672  assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
673  return (T *)buf.begin();
674  }
675 
676  /** A pointer to one beyond the element with the highest address. */
677  T *end() const {
678  assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
679  return (T *)buf.end();
680  }
681 
682  /** The total number of bytes spanned by the data in memory. */
683  size_t size_in_bytes() const {
684  return buf.size_in_bytes();
685  }
686 
687  /** Reset the Buffer to be equivalent to a default-constructed Buffer
688  * of the same static type (if any); Buffer<void> will have its runtime
689  * type reset to uint8. */
690  void reset() {
691  *this = Buffer();
692  }
693 
695  : shape() {
696  buf.type = static_halide_type();
697  // If Dims are statically known, must create storage that many.
698  // otherwise, make a zero-dimensional buffer.
699  constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
700  make_static_shape_storage<buf_dimensions>();
701  }
702 
703  /** Make a Buffer from a halide_buffer_t */
704  explicit Buffer(const halide_buffer_t &buf,
706  assert(T_is_void || buf.type == static_halide_type());
707  initialize_from_buffer(buf, ownership);
708  }
709 
710  /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
711  template<typename T2, int D2, int S2>
712  friend class Buffer;
713 
714 private:
715  template<typename T2, int D2, int S2>
716  static void static_assert_can_convert_from() {
717  static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
718  "Can't convert from a Buffer<const T> to a Buffer<T>");
719  static_assert(std::is_same<typename std::remove_const<T>::type,
720  typename std::remove_const<T2>::type>::value ||
721  T_is_void || Buffer<T2, D2, S2>::T_is_void,
722  "type mismatch constructing Buffer");
723  static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
724  "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
725  }
726 
727 public:
728  static void set_default_allocate_fn(void *(*allocate_fn)(size_t)) {
730  }
731  static void set_default_deallocate_fn(void (*deallocate_fn)(void *)) {
733  }
734 
735  /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
736  * If this can be determined at compile time, fail with a static assert; otherwise
737  * return a boolean based on runtime typing. */
738  template<typename T2, int D2, int S2>
739  static bool can_convert_from(const Buffer<T2, D2, S2> &other) {
740  static_assert_can_convert_from<T2, D2, S2>();
741  if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
742  if (other.type() != static_halide_type()) {
743  return false;
744  }
745  }
746  if (Dims != AnyDims) {
747  if (other.dimensions() != Dims) {
748  return false;
749  }
750  }
751  return true;
752  }
753 
754  /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
755  * cannot be constructed from some other Buffer type. */
756  template<typename T2, int D2, int S2>
757  static void assert_can_convert_from(const Buffer<T2, D2, S2> &other) {
758  // Explicitly call static_assert_can_convert_from() here so
759  // that we always get compile-time checking, even if compiling with
760  // assertions disabled.
761  static_assert_can_convert_from<T2, D2, S2>();
762  assert(can_convert_from(other));
763  }
764 
765  /** Copy constructor. Does not copy underlying data. */
767  : buf(other.buf),
768  alloc(other.alloc) {
769  other.incref();
770  dev_ref_count = other.dev_ref_count;
771  copy_shape_from(other.buf);
772  }
773 
774  /** Construct a Buffer from a Buffer of different dimensionality
775  * and type. Asserts that the type and dimensionality matches (at runtime,
776  * if one of the types is void). Note that this constructor is
777  * implicit. This, for example, lets you pass things like
778  * Buffer<T> or Buffer<const void> to functions expected
779  * Buffer<const T>. */
780  template<typename T2, int D2, int S2>
782  : buf(other.buf),
783  alloc(other.alloc) {
784  assert_can_convert_from(other);
785  other.incref();
786  dev_ref_count = other.dev_ref_count;
787  copy_shape_from(other.buf);
788  }
789 
790  /** Move constructor */
792  : buf(other.buf),
793  alloc(other.alloc),
794  dev_ref_count(other.dev_ref_count) {
795  other.dev_ref_count = nullptr;
796  other.alloc = nullptr;
797  move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
798  }
799 
800  /** Move-construct a Buffer from a Buffer of different
801  * dimensionality and type. Asserts that the types match (at
802  * runtime if one of the types is void). */
803  template<typename T2, int D2, int S2>
805  : buf(other.buf),
806  alloc(other.alloc),
807  dev_ref_count(other.dev_ref_count) {
808  assert_can_convert_from(other);
809  other.dev_ref_count = nullptr;
810  other.alloc = nullptr;
811  move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
812  }
813 
814  /** Assign from another Buffer of possibly-different
815  * dimensionality and type. Asserts that the types match (at
816  * runtime if one of the types is void). */
817  template<typename T2, int D2, int S2>
819  if ((const void *)this == (const void *)&other) {
820  return *this;
821  }
822  assert_can_convert_from(other);
823  other.incref();
824  decref();
825  dev_ref_count = other.dev_ref_count;
826  alloc = other.alloc;
827  free_shape_storage();
828  buf = other.buf;
829  copy_shape_from(other.buf);
830  return *this;
831  }
832 
833  /** Standard assignment operator */
835  // The cast to void* here is just to satisfy clang-tidy
836  if ((const void *)this == (const void *)&other) {
837  return *this;
838  }
839  other.incref();
840  decref();
841  dev_ref_count = other.dev_ref_count;
842  alloc = other.alloc;
843  free_shape_storage();
844  buf = other.buf;
845  copy_shape_from(other.buf);
846  return *this;
847  }
848 
849  /** Move from another Buffer of possibly-different
850  * dimensionality and type. Asserts that the types match (at
851  * runtime if one of the types is void). */
852  template<typename T2, int D2, int S2>
854  assert_can_convert_from(other);
855  decref();
856  alloc = other.alloc;
857  other.alloc = nullptr;
858  dev_ref_count = other.dev_ref_count;
859  other.dev_ref_count = nullptr;
860  free_shape_storage();
861  buf = other.buf;
862  move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
863  return *this;
864  }
865 
866  /** Standard move-assignment operator */
868  decref();
869  alloc = other.alloc;
870  other.alloc = nullptr;
871  dev_ref_count = other.dev_ref_count;
872  other.dev_ref_count = nullptr;
873  free_shape_storage();
874  buf = other.buf;
875  move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
876  return *this;
877  }
878 
879  /** Check the product of the extents fits in memory. */
880  void check_overflow() {
881  size_t size = type().bytes();
882  for (int i = 0; i < dimensions(); i++) {
883  size *= dim(i).extent();
884  }
885  // We allow 2^31 or 2^63 bytes, so drop the top bit.
886  size = (size << 1) >> 1;
887  for (int i = 0; i < dimensions(); i++) {
888  size /= dim(i).extent();
889  }
890  assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
891  }
892 
893  /** Allocate memory for this Buffer. Drops the reference to any
894  * owned memory. */
895  void allocate(void *(*allocate_fn)(size_t) = nullptr,
896  void (*deallocate_fn)(void *) = nullptr) {
897  // Drop any existing allocation
898  deallocate();
899 
900  // Conservatively align images to (usually) 128 bytes. This is enough
901  // alignment for all the platforms we might use. Also ensure that the allocation
902  // is such that the logical size is an integral multiple of 128 bytes (or a bit more).
903  constexpr size_t alignment = HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT;
904 
905  const auto align_up = [=](size_t value) -> size_t {
906  return (value + alignment - 1) & ~(alignment - 1);
907  };
908 
909  size_t size = size_in_bytes();
910 
911 #if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
912  // Only use aligned_alloc() if no custom allocators are specified.
914  // As a practical matter, sizeof(AllocationHeader) is going to be no more than 16 bytes
915  // on any supported platform, so we will just overallocate by 'alignment'
916  // so that the user storage also starts at an aligned point. This is a bit
917  // wasteful, but probably not a big deal.
918  static_assert(sizeof(AllocationHeader) <= alignment);
919  void *alloc_storage = ::aligned_alloc(alignment, align_up(size) + alignment);
920  assert((uintptr_t)alloc_storage == align_up((uintptr_t)alloc_storage));
921  alloc = new (alloc_storage) AllocationHeader(free);
922  buf.host = (uint8_t *)((uintptr_t)alloc_storage + alignment);
923  return;
924  }
925  // else fall thru
926 #endif
927  if (!allocate_fn) {
929  if (!allocate_fn) {
930  allocate_fn = malloc;
931  }
932  }
933  if (!deallocate_fn) {
935  if (!deallocate_fn) {
936  deallocate_fn = free;
937  }
938  }
939 
940  static_assert(sizeof(AllocationHeader) <= alignment);
941 
942  // malloc() and friends must return a pointer aligned to at least alignof(std::max_align_t);
943  // make sure this is OK for AllocationHeader, since it always goes at the start
944  static_assert(alignof(AllocationHeader) <= alignof(std::max_align_t));
945 
946  const size_t requested_size = align_up(size + alignment +
947  std::max(0, (int)sizeof(AllocationHeader) -
948  (int)sizeof(std::max_align_t)));
949  void *alloc_storage = allocate_fn(requested_size);
950  alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
951  uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
952  buf.host = (uint8_t *)align_up((uintptr_t)unaligned_ptr);
953  }
954 
955  /** Drop reference to any owned host or device memory, possibly
956  * freeing it, if this buffer held the last reference to
957  * it. Retains the shape of the buffer. Does nothing if this
958  * buffer did not allocate its own memory. */
959  void deallocate() {
960  decref();
961  }
962 
963  /** Drop reference to any owned device memory, possibly freeing it
964  * if this buffer held the last reference to it. Asserts that
965  * device_dirty is false. */
967  decref(true);
968  }
969 
970  /** Allocate a new image of the given size with a runtime
971  * type. Only used when you do know what size you want but you
972  * don't know statically what type the elements are. Pass zeroes
973  * to make a buffer suitable for bounds query calls. */
974  template<typename... Args,
975  typename = typename std::enable_if<AllInts<Args...>::value>::type>
976  Buffer(halide_type_t t, int first, Args... rest) {
977  if (!T_is_void) {
978  assert(static_halide_type() == t);
979  }
980  int extents[] = {first, (int)rest...};
981  buf.type = t;
982  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
983  make_static_shape_storage<buf_dimensions>();
984  initialize_shape(extents);
985  if (!Internal::any_zero(extents)) {
986  check_overflow();
987  allocate();
988  }
989  }
990 
991  /** Allocate a new image of the given size. Pass zeroes to make a
992  * buffer suitable for bounds query calls. */
993  // @{
994 
995  // The overload with one argument is 'explicit', so that
996  // (say) int is not implicitly convertible to Buffer<int>
997  explicit Buffer(int first) {
998  static_assert(!T_is_void,
999  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1000  int extents[] = {first};
1001  buf.type = static_halide_type();
1002  constexpr int buf_dimensions = 1;
1003  make_static_shape_storage<buf_dimensions>();
1004  initialize_shape(extents);
1005  if (first != 0) {
1006  check_overflow();
1007  allocate();
1008  }
1009  }
1010 
1011  template<typename... Args,
1012  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1013  Buffer(int first, int second, Args... rest) {
1014  static_assert(!T_is_void,
1015  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1016  int extents[] = {first, second, (int)rest...};
1017  buf.type = static_halide_type();
1018  constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
1019  make_static_shape_storage<buf_dimensions>();
1020  initialize_shape(extents);
1021  if (!Internal::any_zero(extents)) {
1022  check_overflow();
1023  allocate();
1024  }
1025  }
1026  // @}
1027 
1028  /** Allocate a new image of unknown type using a vector of ints as the size. */
1029  Buffer(halide_type_t t, const std::vector<int> &sizes) {
1030  if (!T_is_void) {
1031  assert(static_halide_type() == t);
1032  }
1033  buf.type = t;
1034  // make_shape_storage() will do a runtime check that dimensionality matches.
1035  make_shape_storage((int)sizes.size());
1036  initialize_shape(sizes);
1037  if (!Internal::any_zero(sizes)) {
1038  check_overflow();
1039  allocate();
1040  }
1041  }
1042 
1043  /** Allocate a new image of known type using a vector of ints as the size. */
1044  explicit Buffer(const std::vector<int> &sizes)
1045  : Buffer(static_halide_type(), sizes) {
1046  }
1047 
1048 private:
1049  // Create a copy of the sizes vector, ordered as specified by order.
1050  static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
1051  assert(order.size() == sizes.size());
1052  std::vector<int> ordered_sizes(sizes.size());
1053  for (size_t i = 0; i < sizes.size(); ++i) {
1054  ordered_sizes[i] = sizes.at(order[i]);
1055  }
1056  return ordered_sizes;
1057  }
1058 
1059 public:
1060  /** Allocate a new image of unknown type using a vector of ints as the size and
1061  * a vector of indices indicating the storage order for each dimension. The
1062  * length of the sizes vector and the storage-order vector must match. For instance,
1063  * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
1064  Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
1065  : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1066  transpose(storage_order);
1067  }
1068 
1069  Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
1070  : Buffer(static_halide_type(), sizes, storage_order) {
1071  }
1072 
1073  /** Make an Buffer that refers to a statically sized array. Does not
1074  * take ownership of the data, and does not set the host_dirty flag. */
1075  template<typename Array, size_t N>
1076  explicit Buffer(Array (&vals)[N]) {
1077  const int buf_dimensions = dimensionality_of_array(vals);
1078  buf.type = scalar_type_of_array(vals);
1079  buf.host = (uint8_t *)vals;
1080  make_shape_storage(buf_dimensions);
1081  initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1082  }
1083 
1084  /** Initialize an Buffer of runtime type from a pointer and some
1085  * sizes. Assumes dense row-major packing and a min coordinate of
1086  * zero. Does not take ownership of the data and does not set the
1087  * host_dirty flag. */
1088  template<typename... Args,
1089  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1090  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
1091  if (!T_is_void) {
1092  assert(static_halide_type() == t);
1093  }
1094  int extents[] = {first, (int)rest...};
1095  buf.type = t;
1096  buf.host = (uint8_t *)const_cast<void *>(data);
1097  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1098  make_static_shape_storage<buf_dimensions>();
1099  initialize_shape(extents);
1100  }
1101 
1102  /** Initialize an Buffer from a pointer and some sizes. Assumes
1103  * dense row-major packing and a min coordinate of zero. Does not
1104  * take ownership of the data and does not set the host_dirty flag. */
1105  template<typename... Args,
1106  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1107  explicit Buffer(T *data, int first, Args &&...rest) {
1108  int extents[] = {first, (int)rest...};
1109  buf.type = static_halide_type();
1110  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1111  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1112  make_static_shape_storage<buf_dimensions>();
1113  initialize_shape(extents);
1114  }
1115 
1116  /** Initialize an Buffer from a pointer and a vector of
1117  * sizes. Assumes dense row-major packing and a min coordinate of
1118  * zero. Does not take ownership of the data and does not set the
1119  * host_dirty flag. */
1120  explicit Buffer(T *data, const std::vector<int> &sizes) {
1121  buf.type = static_halide_type();
1122  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1123  make_shape_storage((int)sizes.size());
1124  initialize_shape(sizes);
1125  }
1126 
1127  /** Initialize an Buffer of runtime type from a pointer and a
1128  * vector of sizes. Assumes dense row-major packing and a min
1129  * coordinate of zero. Does not take ownership of the data and
1130  * does not set the host_dirty flag. */
1131  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1132  if (!T_is_void) {
1133  assert(static_halide_type() == t);
1134  }
1135  buf.type = t;
1136  buf.host = (uint8_t *)const_cast<void *>(data);
1137  make_shape_storage((int)sizes.size());
1138  initialize_shape(sizes);
1139  }
1140 
1141  /** Initialize an Buffer from a pointer to the min coordinate and
1142  * an array describing the shape. Does not take ownership of the
1143  * data, and does not set the host_dirty flag. */
1144  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1145  if (!T_is_void) {
1146  assert(static_halide_type() == t);
1147  }
1148  buf.type = t;
1149  buf.host = (uint8_t *)const_cast<void *>(data);
1150  make_shape_storage(d);
1151  for (int i = 0; i < d; i++) {
1152  buf.dim[i] = shape[i];
1153  }
1154  }
1155 
1156  /** Initialize a Buffer from a pointer to the min coordinate and
1157  * a vector describing the shape. Does not take ownership of the
1158  * data, and does not set the host_dirty flag. */
1159  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1160  const std::vector<halide_dimension_t> &shape)
1161  : Buffer(t, data, (int)shape.size(), shape.data()) {
1162  }
1163 
1164  /** Initialize an Buffer from a pointer to the min coordinate and
1165  * an array describing the shape. Does not take ownership of the
1166  * data and does not set the host_dirty flag. */
1167  explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1168  buf.type = static_halide_type();
1169  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1170  make_shape_storage(d);
1171  for (int i = 0; i < d; i++) {
1172  buf.dim[i] = shape[i];
1173  }
1174  }
1175 
1176  /** Initialize a Buffer from a pointer to the min coordinate and
1177  * a vector describing the shape. Does not take ownership of the
1178  * data, and does not set the host_dirty flag. */
1179  explicit Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1180  : Buffer(data, (int)shape.size(), shape.data()) {
1181  }
1182 
1183  /** Destructor. Will release any underlying owned allocation if
1184  * this is the last reference to it. Will assert fail if there are
1185  * weak references to this Buffer outstanding. */
1187  decref();
1188  free_shape_storage();
1189  }
1190 
1191  /** Get a pointer to the raw halide_buffer_t this wraps. */
1192  // @{
1194  return &buf;
1195  }
1196 
1197  const halide_buffer_t *raw_buffer() const {
1198  return &buf;
1199  }
1200  // @}
1201 
1202  /** Provide a cast operator to halide_buffer_t *, so that
1203  * instances can be passed directly to Halide filters. */
1204  operator halide_buffer_t *() {
1205  return &buf;
1206  }
1207 
1208  /** Return a typed reference to this Buffer. Useful for converting
1209  * a reference to a Buffer<void> to a reference to, for example, a
1210  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1211  * You can also optionally sspecify a new value for Dims; this is useful
1212  * mainly for removing the dimensionality constraint on a Buffer with
1213  * explicit dimensionality. Does a runtime assert if the source buffer type
1214  * is void or the new dimensionality is incompatible. */
1215  template<typename T2, int D2 = Dims>
1218  return *((Buffer<T2, D2, InClassDimStorage> *)this);
1219  }
1220 
1221  /** Return a const typed reference to this Buffer. Useful for converting
1222  * a reference to a Buffer<void> to a reference to, for example, a
1223  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1224  * You can also optionally sspecify a new value for Dims; this is useful
1225  * mainly for removing the dimensionality constraint on a Buffer with
1226  * explicit dimensionality. Does a runtime assert if the source buffer type
1227  * is void or the new dimensionality is incompatible. */
1228  template<typename T2, int D2 = Dims>
1231  return *((const Buffer<T2, D2, InClassDimStorage> *)this);
1232  }
1233 
1234  /** Return an rval reference to this Buffer. Useful for converting
1235  * a reference to a Buffer<void> to a reference to, for example, a
1236  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1237  * You can also optionally sspecify a new value for Dims; this is useful
1238  * mainly for removing the dimensionality constraint on a Buffer with
1239  * explicit dimensionality. Does a runtime assert if the source buffer type
1240  * is void or the new dimensionality is incompatible. */
1241  template<typename T2, int D2 = Dims>
1244  return *((Buffer<T2, D2, InClassDimStorage> *)this);
1245  }
1246 
1247  /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1248  * to recapitulate the type argument. */
1249  // @{
1251  Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() & {
1252  // Note that we can skip the assert_can_convert_from(), since T -> const T
1253  // conversion is always legal.
1254  return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1255  }
1256 
1258  const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() const & {
1259  return *((const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1260  }
1261 
1263  Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> as_const() && {
1264  return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1265  }
1266  // @}
1267 
1268  /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<const T>& when
1269  * passing arguments */
1270  template<typename T2 = T, typename = typename std::enable_if<!std::is_const<T2>::value>::type>
1271  operator Buffer<typename std::add_const<T2>::type, Dims, InClassDimStorage> &() & {
1272  return as_const();
1273  }
1274 
1275  /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<void>& when
1276  * passing arguments */
1277  template<typename TVoid,
1278  typename T2 = T,
1279  typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1280  !std::is_void<T2>::value &&
1281  !std::is_const<T2>::value>::type>
1283  return as<TVoid, Dims>();
1284  }
1285 
1286  /** Add some syntactic sugar to allow autoconversion from Buffer<const T> to Buffer<const void>& when
1287  * passing arguments */
1288  template<typename TVoid,
1289  typename T2 = T,
1290  typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1291  !std::is_void<T2>::value &&
1292  std::is_const<T2>::value>::type>
1294  return as<const TVoid, Dims>();
1295  }
1296 
1297  /** Conventional names for the first three dimensions. */
1298  // @{
1299  int width() const {
1300  return (dimensions() > 0) ? dim(0).extent() : 1;
1301  }
1302  int height() const {
1303  return (dimensions() > 1) ? dim(1).extent() : 1;
1304  }
1305  int channels() const {
1306  return (dimensions() > 2) ? dim(2).extent() : 1;
1307  }
1308  // @}
1309 
1310  /** Conventional names for the min and max value of each dimension */
1311  // @{
1312  int left() const {
1313  return dim(0).min();
1314  }
1315 
1316  int right() const {
1317  return dim(0).max();
1318  }
1319 
1320  int top() const {
1321  return dim(1).min();
1322  }
1323 
1324  int bottom() const {
1325  return dim(1).max();
1326  }
1327  // @}
1328 
1329  /** Make a new image which is a deep copy of this image. Use crop
1330  * or slice followed by copy to make a copy of only a portion of
1331  * the image. The new image has the same nesting order of dimensions
1332  * (e.g. channels innermost), but resets the strides to the default
1333  * (each stride is the product of the extents of the inner dimensions).
1334  * Note that this means any strides of zero get broadcast into a non-zero stride.
1335  *
1336  * Note that the returned Buffer is always of a non-const type T (ie:
1337  *
1338  * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1339  *
1340  * which is always safe, since we are making a deep copy. (The caller
1341  * can easily cast it back to Buffer<const T> if desired, which is
1342  * always safe and free.)
1343  */
1344  Buffer<not_const_T, Dims, InClassDimStorage> copy(void *(*allocate_fn)(size_t) = nullptr,
1345  void (*deallocate_fn)(void *) = nullptr) const {
1347  dst.copy_from(*this);
1348  return dst;
1349  }
1350 
1351  /** Like copy(), but the copy is created in interleaved memory layout
1352  * (vs. keeping the same memory layout as the original). Requires that 'this'
1353  * has exactly 3 dimensions.
1354  */
1356  void (*deallocate_fn)(void *) = nullptr) const {
1357  static_assert(Dims == AnyDims || Dims == 3);
1358  assert(dimensions() == 3);
1360  dst.set_min(min(0), min(1), min(2));
1361  dst.allocate(allocate_fn, deallocate_fn);
1362  dst.copy_from(*this);
1363  return dst;
1364  }
1365 
1366  /** Like copy(), but the copy is created in planar memory layout
1367  * (vs. keeping the same memory layout as the original).
1368  */
1369  Buffer<not_const_T, Dims, InClassDimStorage> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1370  void (*deallocate_fn)(void *) = nullptr) const {
1371  std::vector<int> mins, extents;
1372  const int dims = dimensions();
1373  mins.reserve(dims);
1374  extents.reserve(dims);
1375  for (int d = 0; d < dims; ++d) {
1376  mins.push_back(dim(d).min());
1377  extents.push_back(dim(d).extent());
1378  }
1380  dst.set_min(mins);
1381  dst.allocate(allocate_fn, deallocate_fn);
1382  dst.copy_from(*this);
1383  return dst;
1384  }
1385 
1386  /** Make a copy of the Buffer which shares the underlying host and/or device
1387  * allocations as the existing Buffer. This is purely syntactic sugar for
1388  * cases where you have a const reference to a Buffer but need a temporary
1389  * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1390  * inline way to create a temporary. \code
1391  * void call_my_func(const Buffer<const uint8_t>& input) {
1392  * my_func(input.alias(), output);
1393  * }\endcode
1394  */
1396  return *this;
1397  }
1398 
1399  /** Fill a Buffer with the values at the same coordinates in
1400  * another Buffer. Restricts itself to coordinates contained
1401  * within the intersection of the two buffers. If the two Buffers
1402  * are not in the same coordinate system, you will need to
1403  * translate the argument Buffer first. E.g. if you're blitting a
1404  * sprite onto a framebuffer, you'll want to translate the sprite
1405  * to the correct location first like so: \code
1406  * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1407  */
1408  template<typename T2, int D2, int S2>
1410  static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1411  assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1412  assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1413 
1415 
1416  static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1417  assert(src.dimensions() == dst.dimensions());
1418 
1419  // Trim the copy to the region in common
1420  const int d = dimensions();
1421  for (int i = 0; i < d; i++) {
1422  int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1423  int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1424  if (max_coord < min_coord) {
1425  // The buffers do not overlap.
1426  return;
1427  }
1428  dst.crop(i, min_coord, max_coord - min_coord + 1);
1429  src.crop(i, min_coord, max_coord - min_coord + 1);
1430  }
1431 
1432  // If T is void, we need to do runtime dispatch to an
1433  // appropriately-typed lambda. We're copying, so we only care
1434  // about the element size. (If not, this should optimize away
1435  // into a static dispatch to the right-sized copy.)
1436  if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1437  using MemType = uint8_t;
1438  auto &typed_dst = reinterpret_cast<Buffer<MemType, Dims, InClassDimStorage> &>(dst);
1439  auto &typed_src = reinterpret_cast<Buffer<const MemType, D2, S2> &>(src);
1440  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1441  } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1442  using MemType = uint16_t;
1443  auto &typed_dst = reinterpret_cast<Buffer<MemType, Dims, InClassDimStorage> &>(dst);
1444  auto &typed_src = reinterpret_cast<Buffer<const MemType, D2, S2> &>(src);
1445  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1446  } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1447  using MemType = uint32_t;
1448  auto &typed_dst = reinterpret_cast<Buffer<MemType, Dims, InClassDimStorage> &>(dst);
1449  auto &typed_src = reinterpret_cast<Buffer<const MemType, D2, S2> &>(src);
1450  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1451  } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1452  using MemType = uint64_t;
1453  auto &typed_dst = reinterpret_cast<Buffer<MemType, Dims, InClassDimStorage> &>(dst);
1454  auto &typed_src = reinterpret_cast<Buffer<const MemType, D2, S2> &>(src);
1455  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1456  } else {
1457  assert(false && "type().bytes() must be 1, 2, 4, or 8");
1458  }
1459  set_host_dirty();
1460  }
1461 
1462  /** Make an image that refers to a sub-range of this image along
1463  * the given dimension. Asserts that the crop region is within
1464  * the existing bounds: you cannot "crop outwards", even if you know there
1465  * is valid Buffer storage (e.g. because you already cropped inwards). */
1466  Buffer<T, Dims, InClassDimStorage> cropped(int d, int min, int extent) const {
1467  // Make a fresh copy of the underlying buffer (but not a fresh
1468  // copy of the allocation, if there is one).
1470 
1471  // This guarantees the prexisting device ref is dropped if the
1472  // device_crop call fails and maintains the buffer in a consistent
1473  // state.
1474  im.device_deallocate();
1475 
1476  im.crop_host(d, min, extent);
1477  if (buf.device_interface != nullptr) {
1478  complete_device_crop(im);
1479  }
1480  return im;
1481  }
1482 
1483  /** Crop an image in-place along the given dimension. This does
1484  * not move any data around in memory - it just changes the min
1485  * and extent of the given dimension. */
1486  void crop(int d, int min, int extent) {
1487  // An optimization for non-device buffers. For the device case,
1488  // a temp buffer is required, so reuse the not-in-place version.
1489  // TODO(zalman|abadams): Are nop crops common enough to special
1490  // case the device part of the if to do nothing?
1491  if (buf.device_interface != nullptr) {
1492  *this = cropped(d, min, extent);
1493  } else {
1494  crop_host(d, min, extent);
1495  }
1496  }
1497 
1498  /** Make an image that refers to a sub-rectangle of this image along
1499  * the first N dimensions. Asserts that the crop region is within
1500  * the existing bounds. The cropped image may drop any device handle
1501  * if the device_interface cannot accomplish the crop in-place. */
1502  Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1503  // Make a fresh copy of the underlying buffer (but not a fresh
1504  // copy of the allocation, if there is one).
1506 
1507  // This guarantees the prexisting device ref is dropped if the
1508  // device_crop call fails and maintains the buffer in a consistent
1509  // state.
1510  im.device_deallocate();
1511 
1512  im.crop_host(rect);
1513  if (buf.device_interface != nullptr) {
1514  complete_device_crop(im);
1515  }
1516  return im;
1517  }
1518 
1519  /** Crop an image in-place along the first N dimensions. This does
1520  * not move any data around in memory, nor does it free memory. It
1521  * just rewrites the min/extent of each dimension to refer to a
1522  * subregion of the same allocation. */
1523  void crop(const std::vector<std::pair<int, int>> &rect) {
1524  // An optimization for non-device buffers. For the device case,
1525  // a temp buffer is required, so reuse the not-in-place version.
1526  // TODO(zalman|abadams): Are nop crops common enough to special
1527  // case the device part of the if to do nothing?
1528  if (buf.device_interface != nullptr) {
1529  *this = cropped(rect);
1530  } else {
1531  crop_host(rect);
1532  }
1533  }
1534 
1535  /** Make an image which refers to the same data with using
1536  * translated coordinates in the given dimension. Positive values
1537  * move the image data to the right or down relative to the
1538  * coordinate system. Drops any device handle. */
1541  im.translate(d, dx);
1542  return im;
1543  }
1544 
1545  /** Translate an image in-place along one dimension by changing
1546  * how it is indexed. Does not move any data around in memory. */
1547  void translate(int d, int delta) {
1548  assert(d >= 0 && d < this->dimensions());
1549  device_deallocate();
1550  buf.dim[d].min += delta;
1551  }
1552 
1553  /** Make an image which refers to the same data translated along
1554  * the first N dimensions. */
1555  Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1557  im.translate(delta);
1558  return im;
1559  }
1560 
1561  /** Translate an image along the first N dimensions by changing
1562  * how it is indexed. Does not move any data around in memory. */
1563  void translate(const std::vector<int> &delta) {
1564  device_deallocate();
1565  assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1566  int limit = (int)delta.size();
1567  assert(limit <= dimensions());
1568  for (int i = 0; i < limit; i++) {
1569  translate(i, delta[i]);
1570  }
1571  }
1572 
1573  /** Set the min coordinate of an image in the first N dimensions. */
1574  // @{
1575  void set_min(const std::vector<int> &mins) {
1576  assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1577  device_deallocate();
1578  for (size_t i = 0; i < mins.size(); i++) {
1579  buf.dim[i].min = mins[i];
1580  }
1581  }
1582 
1583  template<typename... Args>
1584  void set_min(Args... args) {
1585  set_min(std::vector<int>{args...});
1586  }
1587  // @}
1588 
1589  /** Test if a given coordinate is within the bounds of an image. */
1590  // @{
1591  bool contains(const std::vector<int> &coords) const {
1592  assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1593  for (size_t i = 0; i < coords.size(); i++) {
1594  if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1595  return false;
1596  }
1597  }
1598  return true;
1599  }
1600 
1601  template<typename... Args>
1602  bool contains(Args... args) const {
1603  return contains(std::vector<int>{args...});
1604  }
1605  // @}
1606 
1607  /** Make a buffer which refers to the same data in the same layout
1608  * using a swapped indexing order for the dimensions given. So
1609  * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1610  * strongly that A.address_of(i, j) == B.address_of(j, i). */
1613  im.transpose(d1, d2);
1614  return im;
1615  }
1616 
1617  /** Transpose a buffer in-place by changing how it is indexed. For
1618  * example, transpose(0, 1) on a two-dimensional buffer means that
1619  * the value referred to by coordinates (i, j) is now reached at
1620  * the coordinates (j, i), and vice versa. This is done by
1621  * reordering the per-dimension metadata rather than by moving
1622  * data around in memory, so other views of the same memory will
1623  * not see the data as having been transposed. */
1624  void transpose(int d1, int d2) {
1625  assert(d1 >= 0 && d1 < this->dimensions());
1626  assert(d2 >= 0 && d2 < this->dimensions());
1627  std::swap(buf.dim[d1], buf.dim[d2]);
1628  }
1629 
1630  /** A generalized transpose: instead of swapping two dimensions,
1631  * pass a vector that lists each dimension index exactly once, in
1632  * the desired order. This does not move any data around in memory
1633  * - it just permutes how it is indexed. */
1634  void transpose(const std::vector<int> &order) {
1635  assert((int)order.size() == dimensions());
1636  if (dimensions() < 2) {
1637  // My, that was easy
1638  return;
1639  }
1640 
1641  std::vector<int> order_sorted = order;
1642  for (size_t i = 1; i < order_sorted.size(); i++) {
1643  for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1644  std::swap(order_sorted[j], order_sorted[j - 1]);
1645  transpose(j, j - 1);
1646  }
1647  }
1648  }
1649 
1650  /** Make a buffer which refers to the same data in the same
1651  * layout using a different ordering of the dimensions. */
1652  Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1654  im.transpose(order);
1655  return im;
1656  }
1657 
1658  /** Make a lower-dimensional buffer that refers to one slice of
1659  * this buffer. */
1660  Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1661  sliced(int d, int pos) const {
1662  static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1663  assert(dimensions() > 0);
1664 
1666 
1667  // This guarantees the prexisting device ref is dropped if the
1668  // device_slice call fails and maintains the buffer in a consistent
1669  // state.
1670  im.device_deallocate();
1671 
1672  im.slice_host(d, pos);
1673  if (buf.device_interface != nullptr) {
1674  complete_device_slice(im, d, pos);
1675  }
1676  return im;
1677  }
1678 
1679  /** Make a lower-dimensional buffer that refers to one slice of this
1680  * buffer at the dimension's minimum. */
1681  Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1682  sliced(int d) const {
1683  static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1684  assert(dimensions() > 0);
1685 
1686  return sliced(d, dim(d).min());
1687  }
1688 
1689  /** Rewrite the buffer to refer to a single lower-dimensional
1690  * slice of itself along the given dimension at the given
1691  * coordinate. Does not move any data around or free the original
1692  * memory, so other views of the same data are unaffected. Can
1693  * only be called on a Buffer with dynamic dimensionality. */
1694  void slice(int d, int pos) {
1695  static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1696  assert(dimensions() > 0);
1697 
1698  // An optimization for non-device buffers. For the device case,
1699  // a temp buffer is required, so reuse the not-in-place version.
1700  // TODO(zalman|abadams): Are nop slices common enough to special
1701  // case the device part of the if to do nothing?
1702  if (buf.device_interface != nullptr) {
1703  *this = sliced(d, pos);
1704  } else {
1705  slice_host(d, pos);
1706  }
1707  }
1708 
1709  /** Slice a buffer in-place at the dimension's minimum. */
1710  void slice(int d) {
1711  slice(d, dim(d).min());
1712  }
1713 
1714  /** Make a new buffer that views this buffer as a single slice in a
1715  * higher-dimensional space. The new dimension has extent one and
1716  * the given min. This operation is the opposite of slice. As an
1717  * example, the following condition is true:
1718  *
1719  \code
1720  im2 = im.embedded(1, 17);
1721  &im(x, y, c) == &im2(x, 17, y, c);
1722  \endcode
1723  */
1724  Buffer<T, (Dims == AnyDims ? AnyDims : Dims + 1)>
1725  embedded(int d, int pos = 0) const {
1727  im.embed(d, pos);
1728  return im;
1729  }
1730 
1731  /** Embed a buffer in-place, increasing the
1732  * dimensionality. */
1733  void embed(int d, int pos = 0) {
1734  static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1735  assert(d >= 0 && d <= dimensions());
1736  add_dimension();
1737  translate(dimensions() - 1, pos);
1738  for (int i = dimensions() - 1; i > d; i--) {
1739  transpose(i, i - 1);
1740  }
1741  }
1742 
1743  /** Add a new dimension with a min of zero and an extent of
1744  * one. The stride is the extent of the outermost dimension times
1745  * its stride. The new dimension is the last dimension. This is a
1746  * special case of embed. */
1747  void add_dimension() {
1748  static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1749  const int dims = buf.dimensions;
1750  buf.dimensions++;
1751  if (buf.dim != shape) {
1752  // We're already on the heap. Reallocate.
1753  halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1754  for (int i = 0; i < dims; i++) {
1755  new_shape[i] = buf.dim[i];
1756  }
1757  delete[] buf.dim;
1758  buf.dim = new_shape;
1759  } else if (dims == InClassDimStorage) {
1760  // Transition from the in-class storage to the heap
1761  make_shape_storage(buf.dimensions);
1762  for (int i = 0; i < dims; i++) {
1763  buf.dim[i] = shape[i];
1764  }
1765  } else {
1766  // We still fit in the class
1767  }
1768  buf.dim[dims] = {0, 1, 0};
1769  if (dims == 0) {
1770  buf.dim[dims].stride = 1;
1771  } else {
1772  buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1773  }
1774  }
1775 
1776  /** Add a new dimension with a min of zero, an extent of one, and
1777  * the specified stride. The new dimension is the last
1778  * dimension. This is a special case of embed. */
1780  add_dimension();
1781  buf.dim[buf.dimensions - 1].stride = s;
1782  }
1783 
1784  /** Methods for managing any GPU allocation. */
1785  // @{
1786  // Set the host dirty flag. Called by every operator()
1787  // access. Must be inlined so it can be hoisted out of loops.
1789  void set_host_dirty(bool v = true) {
1790  assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1791  buf.set_host_dirty(v);
1792  }
1793 
1794  // Check if the device allocation is dirty. Called by
1795  // set_host_dirty, which is called by every accessor. Must be
1796  // inlined so it can be hoisted out of loops.
1798  bool device_dirty() const {
1799  return buf.device_dirty();
1800  }
1801 
1802  bool host_dirty() const {
1803  return buf.host_dirty();
1804  }
1805 
1806  void set_device_dirty(bool v = true) {
1807  assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1808  buf.set_device_dirty(v);
1809  }
1810 
1811  int copy_to_host(void *ctx = nullptr) {
1812  if (device_dirty()) {
1813  return buf.device_interface->copy_to_host(ctx, &buf);
1814  }
1816  }
1817 
1818  int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1819  if (host_dirty()) {
1820  return device_interface->copy_to_device(ctx, &buf, device_interface);
1821  }
1823  }
1824 
1825  int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1826  return device_interface->device_malloc(ctx, &buf, device_interface);
1827  }
1828 
1829  int device_free(void *ctx = nullptr) {
1830  if (dev_ref_count) {
1831  assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1832  "Can't call device_free on an unmanaged or wrapped native device handle. "
1833  "Free the source allocation or call device_detach_native instead.");
1834  // Multiple people may be holding onto this dev field
1835  assert(dev_ref_count->count == 1 &&
1836  "Multiple Halide::Runtime::Buffer objects share this device "
1837  "allocation. Freeing it would create dangling references. "
1838  "Don't call device_free on Halide buffers that you have copied or "
1839  "passed by value.");
1840  }
1841  int ret = halide_error_code_success;
1842  if (buf.device_interface) {
1843  ret = buf.device_interface->device_free(ctx, &buf);
1844  }
1845  if (dev_ref_count) {
1846  delete dev_ref_count;
1847  dev_ref_count = nullptr;
1848  }
1849  return ret;
1850  }
1851 
1852  int device_wrap_native(const struct halide_device_interface_t *device_interface,
1853  uint64_t handle, void *ctx = nullptr) {
1854  assert(device_interface);
1855  dev_ref_count = new DeviceRefCount;
1857  return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1858  }
1859 
1860  int device_detach_native(void *ctx = nullptr) {
1861  assert(dev_ref_count &&
1862  dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative &&
1863  "Only call device_detach_native on buffers wrapping a native "
1864  "device handle via device_wrap_native. This buffer was allocated "
1865  "using device_malloc, or is unmanaged. "
1866  "Call device_free or free the original allocation instead.");
1867  // Multiple people may be holding onto this dev field
1868  assert(dev_ref_count->count == 1 &&
1869  "Multiple Halide::Runtime::Buffer objects share this device "
1870  "allocation. Freeing it could create dangling references. "
1871  "Don't call device_detach_native on Halide buffers that you "
1872  "have copied or passed by value.");
1873  int ret = halide_error_code_success;
1874  if (buf.device_interface) {
1875  ret = buf.device_interface->detach_native(ctx, &buf);
1876  }
1877  delete dev_ref_count;
1878  dev_ref_count = nullptr;
1879  return ret;
1880  }
1881 
1882  int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1883  return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1884  }
1885 
1886  int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1887  if (dev_ref_count) {
1888  assert(dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost &&
1889  "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1890  "Free the source allocation or call device_detach_native instead.");
1891  // Multiple people may be holding onto this dev field
1892  assert(dev_ref_count->count == 1 &&
1893  "Multiple Halide::Runtime::Buffer objects share this device "
1894  "allocation. Freeing it would create dangling references. "
1895  "Don't call device_and_host_free on Halide buffers that you have copied or "
1896  "passed by value.");
1897  }
1898  int ret = halide_error_code_success;
1899  if (buf.device_interface) {
1900  ret = buf.device_interface->device_and_host_free(ctx, &buf);
1901  }
1902  if (dev_ref_count) {
1903  delete dev_ref_count;
1904  dev_ref_count = nullptr;
1905  }
1906  return ret;
1907  }
1908 
1909  int device_sync(void *ctx = nullptr) {
1910  return buf.device_sync(ctx);
1911  }
1912 
1913  bool has_device_allocation() const {
1914  return buf.device != 0;
1915  }
1916 
1917  /** Return the method by which the device field is managed. */
1919  if (dev_ref_count == nullptr) {
1921  }
1922  return dev_ref_count->ownership;
1923  }
1924  // @}
1925 
1926  /** If you use the (x, y, c) indexing convention, then Halide
1927  * Buffers are stored planar by default. This function constructs
1928  * an interleaved RGB or RGBA image that can still be indexed
1929  * using (x, y, c). Passing it to a generator requires that the
1930  * generator has been compiled with support for interleaved (also
1931  * known as packed or chunky) memory layouts. */
1932  static Buffer<void, Dims, InClassDimStorage> make_interleaved(halide_type_t t, int width, int height, int channels) {
1933  static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1934  Buffer<void, Dims, InClassDimStorage> im(t, channels, width, height);
1935  // Note that this is equivalent to calling transpose({2, 0, 1}),
1936  // but slightly more efficient.
1937  im.transpose(0, 1);
1938  im.transpose(1, 2);
1939  return im;
1940  }
1941 
1942  /** If you use the (x, y, c) indexing convention, then Halide
1943  * Buffers are stored planar by default. This function constructs
1944  * an interleaved RGB or RGBA image that can still be indexed
1945  * using (x, y, c). Passing it to a generator requires that the
1946  * generator has been compiled with support for interleaved (also
1947  * known as packed or chunky) memory layouts. */
1948  static Buffer<T, Dims, InClassDimStorage> make_interleaved(int width, int height, int channels) {
1949  return make_interleaved(static_halide_type(), width, height, channels);
1950  }
1951 
1952  /** Wrap an existing interleaved image. */
1953  static Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage>
1954  make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1955  static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1956  Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1957  im.transpose(0, 1);
1958  im.transpose(1, 2);
1959  return im;
1960  }
1961 
1962  /** Wrap an existing interleaved image. */
1963  static Buffer<T, Dims, InClassDimStorage> make_interleaved(T *data, int width, int height, int channels) {
1964  return make_interleaved(static_halide_type(), data, width, height, channels);
1965  }
1966 
1967  /** Make a zero-dimensional Buffer */
1969  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1970  Buffer<add_const_if_T_is_const<void>, AnyDims, InClassDimStorage> buf(t, 1);
1971  buf.slice(0, 0);
1972  return buf;
1973  }
1974 
1975  /** Make a zero-dimensional Buffer */
1977  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1979  buf.slice(0, 0);
1980  return buf;
1981  }
1982 
1983  /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1985  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1987  buf.slice(0, 0);
1988  return buf;
1989  }
1990 
1991  /** Make a buffer with the same shape and memory nesting order as
1992  * another buffer. It may have a different type. */
1993  template<typename T2, int D2, int S2>
1995  void *(*allocate_fn)(size_t) = nullptr,
1996  void (*deallocate_fn)(void *) = nullptr) {
1997  static_assert(Dims == D2 || Dims == AnyDims);
1998  const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1999  return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
2000  allocate_fn, deallocate_fn);
2001  }
2002 
2003 private:
2004  static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
2005  int dimensions,
2006  halide_dimension_t *shape,
2007  void *(*allocate_fn)(size_t),
2008  void (*deallocate_fn)(void *)) {
2009  // Reorder the dimensions of src to have strides in increasing order
2010  std::vector<int> swaps;
2011  for (int i = dimensions - 1; i > 0; i--) {
2012  for (int j = i; j > 0; j--) {
2013  if (shape[j - 1].stride > shape[j].stride) {
2014  std::swap(shape[j - 1], shape[j]);
2015  swaps.push_back(j);
2016  }
2017  }
2018  }
2019 
2020  // Rewrite the strides to be dense (this messes up src, which
2021  // is why we took it by value).
2022  for (int i = 0; i < dimensions; i++) {
2023  if (i == 0) {
2024  shape[i].stride = 1;
2025  } else {
2026  shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
2027  }
2028  }
2029 
2030  // Undo the dimension reordering
2031  while (!swaps.empty()) {
2032  int j = swaps.back();
2033  std::swap(shape[j - 1], shape[j]);
2034  swaps.pop_back();
2035  }
2036 
2037  // Use an explicit runtime type, and make dst a Buffer<void>, to allow
2038  // using this method with Buffer<void> for either src or dst.
2039  Buffer<> dst(dst_type, nullptr, dimensions, shape);
2040  dst.allocate(allocate_fn, deallocate_fn);
2041 
2042  return dst;
2043  }
2044 
2045  template<typename... Args>
2047  ptrdiff_t
2048  offset_of(int d, int first, Args... rest) const {
2049 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2050  assert(first >= this->buf.dim[d].min);
2051  assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2052 #endif
2053  return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
2054  }
2055 
2057  ptrdiff_t offset_of(int d) const {
2058  return 0;
2059  }
2060 
2061  template<typename... Args>
2063  storage_T *
2064  address_of(Args... args) const {
2065  if (T_is_void) {
2066  return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
2067  } else {
2068  return (storage_T *)(this->buf.host) + offset_of(0, args...);
2069  }
2070  }
2071 
2073  ptrdiff_t offset_of(const int *pos) const {
2074  ptrdiff_t offset = 0;
2075  for (int i = this->dimensions() - 1; i >= 0; i--) {
2076 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2077  assert(pos[i] >= this->buf.dim[i].min);
2078  assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
2079 #endif
2080  offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
2081  }
2082  return offset;
2083  }
2084 
2086  storage_T *address_of(const int *pos) const {
2087  if (T_is_void) {
2088  return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
2089  } else {
2090  return (storage_T *)this->buf.host + offset_of(pos);
2091  }
2092  }
2093 
2094 public:
2095  /** Get a pointer to the address of the min coordinate. */
2096  T *data() const {
2097  return (T *)(this->buf.host);
2098  }
2099 
2100  /** Access elements. Use im(...) to get a reference to an element,
2101  * and use &im(...) to get the address of an element. If you pass
2102  * fewer arguments than the buffer has dimensions, the rest are
2103  * treated as their min coordinate. The non-const versions set the
2104  * host_dirty flag to true.
2105  */
2106  //@{
2107  template<typename... Args,
2108  typename = typename std::enable_if<AllInts<Args...>::value>::type>
2109  HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
2110  static_assert(!T_is_void,
2111  "Cannot use operator() on Buffer<void> types");
2112  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2113  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2114  assert(!device_dirty());
2115  return *((const not_void_T *)(address_of(first, rest...)));
2116  }
2117 
2119  const not_void_T &
2120  operator()() const {
2121  static_assert(!T_is_void,
2122  "Cannot use operator() on Buffer<void> types");
2123  constexpr int expected_dims = 0;
2124  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2125  assert(!device_dirty());
2126  return *((const not_void_T *)(data()));
2127  }
2128 
2130  const not_void_T &
2131  operator()(const int *pos) const {
2132  static_assert(!T_is_void,
2133  "Cannot use operator() on Buffer<void> types");
2134  assert(!device_dirty());
2135  return *((const not_void_T *)(address_of(pos)));
2136  }
2137 
2138  template<typename... Args,
2139  typename = typename std::enable_if<AllInts<Args...>::value>::type>
2141  not_void_T &
2142  operator()(int first, Args... rest) {
2143  static_assert(!T_is_void,
2144  "Cannot use operator() on Buffer<void> types");
2145  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2146  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2147  set_host_dirty();
2148  return *((not_void_T *)(address_of(first, rest...)));
2149  }
2150 
2152  not_void_T &
2154  static_assert(!T_is_void,
2155  "Cannot use operator() on Buffer<void> types");
2156  constexpr int expected_dims = 0;
2157  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2158  set_host_dirty();
2159  return *((not_void_T *)(data()));
2160  }
2161 
2163  not_void_T &
2164  operator()(const int *pos) {
2165  static_assert(!T_is_void,
2166  "Cannot use operator() on Buffer<void> types");
2167  set_host_dirty();
2168  return *((not_void_T *)(address_of(pos)));
2169  }
2170  // @}
2171 
2172  /** Tests that all values in this buffer are equal to val. */
2173  bool all_equal(not_void_T val) const {
2174  bool all_equal = true;
2175  for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2176  return all_equal;
2177  }
2178 
2180  set_host_dirty();
2181  for_each_value([=](T &v) { v = val; });
2182  return *this;
2183  }
2184 
2185 private:
2186  /** Helper functions for for_each_value. */
2187  // @{
2188  template<int N>
2189  struct for_each_value_task_dim {
2190  std::ptrdiff_t extent;
2191  std::ptrdiff_t stride[N];
2192  };
2193 
2194  // Given an array of strides, and a bunch of pointers to pointers
2195  // (all of different types), advance the pointers using the
2196  // strides.
2197  template<typename Ptr, typename... Ptrs>
2198  HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2199  ptr += *stride;
2200  advance_ptrs(stride + 1, ptrs...);
2201  }
2202 
2204  static void advance_ptrs(const std::ptrdiff_t *) {
2205  }
2206 
2207  template<typename Fn, typename Ptr, typename... Ptrs>
2208  HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2209  const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2210  if (d == 0) {
2211  if (innermost_strides_are_one) {
2212  Ptr end = ptr + t[0].extent;
2213  while (ptr != end) {
2214  f(*ptr++, (*ptrs++)...);
2215  }
2216  } else {
2217  for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2218  f(*ptr, (*ptrs)...);
2219  advance_ptrs(t[0].stride, ptr, ptrs...);
2220  }
2221  }
2222  } else {
2223  for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2224  for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2225  advance_ptrs(t[d].stride, ptr, ptrs...);
2226  }
2227  }
2228  }
2229 
2230  // Return pair is <new_dimensions, innermost_strides_are_one>
2231  template<int N>
2232  HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2233  const halide_buffer_t **buffers) {
2234  const int dimensions = buffers[0]->dimensions;
2235  assert(dimensions > 0);
2236 
2237  // Check the buffers all have clean host allocations
2238  for (int i = 0; i < N; i++) {
2239  if (buffers[i]->device) {
2240  assert(buffers[i]->host &&
2241  "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2242  assert(!buffers[i]->device_dirty() &&
2243  "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2244  } else {
2245  assert(buffers[i]->host &&
2246  "Buffer passed to for_each_value has no host or device allocation");
2247  }
2248  }
2249 
2250  // Extract the strides in all the dimensions
2251  for (int i = 0; i < dimensions; i++) {
2252  for (int j = 0; j < N; j++) {
2253  assert(buffers[j]->dimensions == dimensions);
2254  assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2255  buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2256  const int s = buffers[j]->dim[i].stride;
2257  t[i].stride[j] = s;
2258  }
2259  t[i].extent = buffers[0]->dim[i].extent;
2260 
2261  // Order the dimensions by stride, so that the traversal is cache-coherent.
2262  // Use the last dimension for this, because this is the source in copies.
2263  // It appears to be better to optimize read order than write order.
2264  for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2265  std::swap(t[j], t[j - 1]);
2266  }
2267  }
2268 
2269  // flatten dimensions where possible to make a larger inner
2270  // loop for autovectorization.
2271  int d = dimensions;
2272  for (int i = 1; i < d; i++) {
2273  bool flat = true;
2274  for (int j = 0; j < N; j++) {
2275  flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2276  }
2277  if (flat) {
2278  t[i - 1].extent *= t[i].extent;
2279  for (int j = i; j < d - 1; j++) {
2280  t[j] = t[j + 1];
2281  }
2282  i--;
2283  d--;
2284  }
2285  }
2286 
2287  // Note that we assert() that dimensions > 0 above
2288  // (our one-and-only caller will only call us that way)
2289  // so the unchecked access to t[0] should be safe.
2290  bool innermost_strides_are_one = true;
2291  for (int i = 0; i < N; i++) {
2292  innermost_strides_are_one &= (t[0].stride[i] == 1);
2293  }
2294 
2295  return {d, innermost_strides_are_one};
2296  }
2297 
2298  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2299  void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2300  if (dimensions() > 0) {
2301  const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
2302  Buffer<>::for_each_value_task_dim<N> *t =
2303  (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA(alloc_size);
2304  // Move the preparatory code into a non-templated helper to
2305  // save code size.
2306  const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2307  auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
2308  if (new_dims > 0) {
2309  Buffer<>::for_each_value_helper(f, new_dims - 1,
2310  innermost_strides_are_one,
2311  t,
2312  data(), (other_buffers.data())...);
2313  return;
2314  }
2315  // else fall thru
2316  }
2317 
2318  // zero-dimensional case
2319  f(*data(), (*other_buffers.data())...);
2320  }
2321  // @}
2322 
2323 public:
2324  /** Call a function on every value in the buffer, and the
2325  * corresponding values in some number of other buffers of the
2326  * same size. The function should take a reference, const
2327  * reference, or value of the correct type for each buffer. This
2328  * effectively lifts a function of scalars to an element-wise
2329  * function of buffers. This produces code that the compiler can
2330  * autovectorize. This is slightly cheaper than for_each_element,
2331  * because it does not need to track the coordinates.
2332  *
2333  * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2334  * 'this' or the other-buffers arguments) will allow mutation of the
2335  * buffer contents, while a Buffer<const T> will not. Attempting to specify
2336  * a mutable reference for the lambda argument of a Buffer<const T>
2337  * will result in a compilation error. */
2338  // @{
2339  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2340  HALIDE_ALWAYS_INLINE const Buffer<T, Dims, InClassDimStorage> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2341  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2342  return *this;
2343  }
2344 
2345  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2348  for_each_value(Fn &&f, Args &&...other_buffers) {
2349  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2350  return *this;
2351  }
2352  // @}
2353 
2354 private:
2355  // Helper functions for for_each_element
2356  struct for_each_element_task_dim {
2357  int min, max;
2358  };
2359 
2360  /** If f is callable with this many args, call it. The first
2361  * argument is just to make the overloads distinct. Actual
2362  * overload selection is done using the enable_if. */
2363  template<typename Fn,
2364  typename... Args,
2365  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2366  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2367  f(args...);
2368  }
2369 
2370  /** If the above overload is impossible, we add an outer loop over
2371  * an additional argument and try again. */
2372  template<typename Fn,
2373  typename... Args>
2374  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2375  for (int i = t[d].min; i <= t[d].max; i++) {
2376  for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2377  }
2378  }
2379 
2380  /** Determine the minimum number of arguments a callable can take
2381  * using the same trick. */
2382  template<typename Fn,
2383  typename... Args,
2384  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2385  HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2386  return (int)(sizeof...(Args));
2387  }
2388 
2389  /** The recursive version is only enabled up to a recursion limit
2390  * of 256. This catches callables that aren't callable with any
2391  * number of ints. */
2392  template<typename Fn,
2393  typename... Args>
2394  HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2395  static_assert(sizeof...(args) <= 256,
2396  "Callable passed to for_each_element must accept either a const int *,"
2397  " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2398  return num_args(0, std::forward<Fn>(f), 0, args...);
2399  }
2400 
2401  /** A version where the callable takes a position array instead,
2402  * with compile-time recursion on the dimensionality. This
2403  * overload is preferred to the one below using the same int vs
2404  * double trick as above, but is impossible once d hits -1 using
2405  * std::enable_if. */
2406  template<int d,
2407  typename Fn,
2408  typename = typename std::enable_if<(d >= 0)>::type>
2409  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2410  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2411  for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2412  }
2413  }
2414 
2415  /** Base case for recursion above. */
2416  template<int d,
2417  typename Fn,
2418  typename = typename std::enable_if<(d < 0)>::type>
2419  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2420  f(pos);
2421  }
2422 
2423  /** A run-time-recursive version (instead of
2424  * compile-time-recursive) that requires the callable to take a
2425  * pointer to a position array instead. Dispatches to the
2426  * compile-time-recursive version once the dimensionality gets
2427  * small. */
2428  template<typename Fn>
2429  static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2430  if (d == -1) {
2431  f(pos);
2432  } else if (d == 0) {
2433  // Once the dimensionality gets small enough, dispatch to
2434  // a compile-time-recursive version for better codegen of
2435  // the inner loops.
2436  for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2437  } else if (d == 1) {
2438  for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2439  } else if (d == 2) {
2440  for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2441  } else if (d == 3) {
2442  for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2443  } else {
2444  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2445  for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2446  }
2447  }
2448  }
2449 
2450  /** We now have two overloads for for_each_element. This one
2451  * triggers if the callable takes a const int *.
2452  */
2453  template<typename Fn,
2454  typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2455  static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2456  const int size = dims * sizeof(int);
2457  int *pos = (int *)HALIDE_ALLOCA(size);
2458  // At least one version of GCC will (incorrectly) report that pos "may be used uninitialized".
2459  // Add this memset to silence it.
2460  memset(pos, 0, size);
2461  for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2462  }
2463 
2464  /** This one triggers otherwise. It treats the callable as
2465  * something that takes some number of ints. */
2466  template<typename Fn>
2467  HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2468  int args = num_args(0, std::forward<Fn>(f));
2469  assert(dims >= args);
2470  for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2471  }
2472 
2473  template<typename Fn>
2474  void for_each_element_impl(Fn &&f) const {
2475  for_each_element_task_dim *t =
2476  (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2477  for (int i = 0; i < dimensions(); i++) {
2478  t[i].min = dim(i).min();
2479  t[i].max = dim(i).max();
2480  }
2481  for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2482  }
2483 
2484 public:
2485  /** Call a function at each site in a buffer. This is likely to be
2486  * much slower than using Halide code to populate a buffer, but is
2487  * convenient for tests. If the function has more arguments than the
2488  * buffer has dimensions, the remaining arguments will be zero. If it
2489  * has fewer arguments than the buffer has dimensions then the last
2490  * few dimensions of the buffer are not iterated over. For example,
2491  * the following code exploits this to set a floating point RGB image
2492  * to red:
2493 
2494  \code
2495  Buffer<float, 3> im(100, 100, 3);
2496  im.for_each_element([&](int x, int y) {
2497  im(x, y, 0) = 1.0f;
2498  im(x, y, 1) = 0.0f;
2499  im(x, y, 2) = 0.0f:
2500  });
2501  \endcode
2502 
2503  * The compiled code is equivalent to writing the a nested for loop,
2504  * and compilers are capable of optimizing it in the same way.
2505  *
2506  * If the callable can be called with an int * as the sole argument,
2507  * that version is called instead. Each location in the buffer is
2508  * passed to it in a coordinate array. This version is higher-overhead
2509  * than the variadic version, but is useful for writing generic code
2510  * that accepts buffers of arbitrary dimensionality. For example, the
2511  * following sets the value at all sites in an arbitrary-dimensional
2512  * buffer to their first coordinate:
2513 
2514  \code
2515  im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2516  \endcode
2517 
2518  * It is also possible to use for_each_element to iterate over entire
2519  * rows or columns by cropping the buffer to a single column or row
2520  * respectively and iterating over elements of the result. For example,
2521  * to set the diagonal of the image to 1 by iterating over the columns:
2522 
2523  \code
2524  Buffer<float, 3> im(100, 100, 3);
2525  im.sliced(1, 0).for_each_element([&](int x, int c) {
2526  im(x, x, c) = 1.0f;
2527  });
2528  \endcode
2529 
2530  * Or, assuming the memory layout is known to be dense per row, one can
2531  * memset each row of an image like so:
2532 
2533  \code
2534  Buffer<float, 3> im(100, 100, 3);
2535  im.sliced(0, 0).for_each_element([&](int y, int c) {
2536  memset(&im(0, y, c), 0, sizeof(float) * im.width());
2537  });
2538  \endcode
2539 
2540  */
2541  // @{
2542  template<typename Fn>
2544  for_each_element_impl(f);
2545  return *this;
2546  }
2547 
2548  template<typename Fn>
2552  for_each_element_impl(f);
2553  return *this;
2554  }
2555  // @}
2556 
2557 private:
2558  template<typename Fn>
2559  struct FillHelper {
2560  Fn f;
2562 
2563  template<typename... Args,
2564  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2565  void operator()(Args... args) {
2566  (*buf)(args...) = f(args...);
2567  }
2568 
2569  FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2570  : f(std::forward<Fn>(f)), buf(buf) {
2571  }
2572  };
2573 
2574 public:
2575  /** Fill a buffer by evaluating a callable at every site. The
2576  * callable should look much like a callable passed to
2577  * for_each_element, but it should return the value that should be
2578  * stored to the coordinate corresponding to the arguments. */
2579  template<typename Fn,
2580  typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2582  // We'll go via for_each_element. We need a variadic wrapper lambda.
2583  FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2584  return for_each_element(wrapper);
2585  }
2586 
2587  /** Check if an input buffer passed extern stage is a querying
2588  * bounds. Compared to doing the host pointer check directly,
2589  * this both adds clarity to code and will facilitate moving to
2590  * another representation for bounds query arguments. */
2591  bool is_bounds_query() const {
2592  return buf.is_bounds_query();
2593  }
2594 
2595  /** Convenient check to verify that all of the interesting bytes in the Buffer
2596  * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2597  * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2598  * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2599  * the entire Buffer storage.) */
2600  void msan_check_mem_is_initialized(bool entire = false) const {
2601 #if defined(__has_feature)
2602 #if __has_feature(memory_sanitizer)
2603  if (entire) {
2604  __msan_check_mem_is_initialized(data(), size_in_bytes());
2605  } else {
2606  for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2607  }
2608 #endif
2609 #endif
2610  }
2611 };
2612 
2613 } // namespace Runtime
2614 } // namespace Halide
2615 
2616 #undef HALIDE_ALLOCA
2617 
2618 #endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
Definition: HalideBuffer.h:53
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:37
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
Definition: HalideRuntime.h:50
@ halide_error_code_success
There was no error.
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:49
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Definition: Buffer.h:122
Read-only access to the shape.
Definition: HalideBuffer.h:574
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:579
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:625
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:595
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:621
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
Definition: HalideBuffer.h:585
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:616
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:590
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:222
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:704
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:895
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:677
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
static void set_default_allocate_fn(void *(*allocate_fn)(size_t))
Definition: HalideBuffer.h:728
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:270
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
int width() const
Conventional names for the first three dimensions.
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
int copy_to_host(void *ctx=nullptr)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_free(void *ctx=nullptr)
int extent(int i) const
Definition: HalideBuffer.h:641
bool contains(Args... args) const
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
void set_device_dirty(bool v=true)
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:804
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:818
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
int device_detach_native(void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:631
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
Buffer(int first, int second, Args... rest)
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:880
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
Definition: HalideBuffer.h:739
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_sync(void *ctx=nullptr)
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:265
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:665
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:997
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
HALIDE_ALWAYS_INLINE bool device_dirty() const
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
Definition: HalideBuffer.h:278
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:959
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:683
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
bool has_device_allocation() const
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
Definition: HalideBuffer.h:867
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Definition: HalideBuffer.h:690
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:976
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:656
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
Definition: HalideBuffer.h:834
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:638
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:853
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:966
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Definition: HalideBuffer.h:791
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
HALIDE_ALWAYS_INLINE not_void_T & operator()()
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
void set_min(Args... args)
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:651
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
Definition: HalideBuffer.h:757
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
Definition: HalideBuffer.h:766
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
T * data() const
Get a pointer to the address of the min coordinate.
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
int left() const
Conventional names for the min and max value of each dimension.
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
int stride(int i) const
Definition: HalideBuffer.h:644
static void set_default_deallocate_fn(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:731
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:781
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:671
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
const halide_buffer_t * raw_buffer() const
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
HALIDE_ALWAYS_INLINE auto slice(Vec vec, Base base, Stride stride, Lanes lanes) noexcept -> SliceOp< decltype(pattern_arg(vec)), decltype(pattern_arg(base)), decltype(pattern_arg(stride)), decltype(pattern_arg(lanes))>
Definition: IRMatch.h:2191
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b)
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b)
auto end(reverse_adaptor< T > i)
Definition: Util.h:472
bool any_zero(const Container &c)
Definition: HalideBuffer.h:149
constexpr int AnyDims
Definition: HalideBuffer.h:193
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:177
@ AllocatedDeviceAndHost
No free routine will be called when device ref count goes to zero
@ WrappedNative
halide_device_free will be called when device ref count goes to zero
@ Unmanaged
halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:597
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:600
unsigned __INT64_TYPE__ uint64_t
void * malloc(size_t)
__UINTPTR_TYPE__ uintptr_t
ALWAYS_INLINE T align_up(T p, size_t alignment)
unsigned __INT8_TYPE__ uint8_t
void * memset(void *s, int val, size_t n)
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
__SIZE_TYPE__ size_t
unsigned __INT32_TYPE__ uint32_t
void * memcpy(void *s1, const void *s2, size_t n)
void free(void *)
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:166
AllocationHeader(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:171
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:601
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:606
A similar struct for managing device allocations.
Definition: HalideBuffer.h:186
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:190
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
struct halide_type_t type
The type of each buffer element.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_slice)(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* device_release_crop)(void *user_context, struct halide_buffer_t *buf)
int(* device_crop)(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
int(* copy_to_host)(void *user_context, struct halide_buffer_t *buf)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_free)(void *user_context, struct halide_buffer_t *buf)
int(* detach_native)(void *user_context, struct halide_buffer_t *buf)
int(* device_and_host_free)(void *user_context, struct halide_buffer_t *buf)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.