Halide  20.0.0
Halide compiler and libraries
IROperator.h
Go to the documentation of this file.
1 #ifndef HALIDE_IR_OPERATOR_H
2 #define HALIDE_IR_OPERATOR_H
3 
4 /** \file
5  *
6  * Defines various operator overloads and utility functions that make
7  * it more pleasant to work with Halide expressions.
8  */
9 
10 #include <cmath>
11 #include <map>
12 #include <optional>
13 
14 #include "Expr.h"
15 #include "Target.h"
16 #include "Tuple.h"
17 
18 namespace Halide {
19 
20 namespace Internal {
21 /** Is the expression either an IntImm, a FloatImm, a StringImm, or a
22  * Cast of the same, or a Ramp or Broadcast of the same. Doesn't do
23  * any constant folding. */
24 bool is_const(const Expr &e);
25 
26 /** Is the expression an IntImm, FloatImm of a particular value, or a
27  * Cast, or Broadcast of the same. */
28 bool is_const(const Expr &e, int64_t v);
29 
30 /** If an expression is an IntImm or a Broadcast of an IntImm, return
31  * a its value. Otherwise returns std::nullopt. */
32 std::optional<int64_t> as_const_int(const Expr &e);
33 
34 /** If an expression is a UIntImm or a Broadcast of a UIntImm, return
35  * its value. Otherwise returns std::nullopt. */
36 std::optional<uint64_t> as_const_uint(const Expr &e);
37 
38 /** If an expression is a FloatImm or a Broadcast of a FloatImm,
39  * return its value. Otherwise returns std::nullopt. */
40 std::optional<double> as_const_float(const Expr &e);
41 
42 /** Is the expression a constant integer power of two. Returns log base two of
43  * the expression if it is, or std::nullopt if not. Also returns std::nullopt
44  * for non-integer types. */
45 // @{
46 std::optional<int> is_const_power_of_two_integer(const Expr &e);
49 // @}
50 
51 /** Is the expression a const (as defined by is_const), and also
52  * strictly greater than zero (in all lanes, if a vector expression) */
53 bool is_positive_const(const Expr &e);
54 
55 /** Is the expression a const (as defined by is_const), and also
56  * strictly less than zero (in all lanes, if a vector expression) */
57 bool is_negative_const(const Expr &e);
58 
59 /** Is the expression an undef */
60 bool is_undef(const Expr &e);
61 
62 /** Is the expression a const (as defined by is_const), and also equal
63  * to zero (in all lanes, if a vector expression) */
64 bool is_const_zero(const Expr &e);
65 
66 /** Is the expression a const (as defined by is_const), and also equal
67  * to one (in all lanes, if a vector expression) */
68 bool is_const_one(const Expr &e);
69 
70 /** Is the statement a no-op (which we represent as either an
71  * undefined Stmt, or as an Evaluate node of a constant) */
72 bool is_no_op(const Stmt &s);
73 
74 /** Does the expression
75  * 1) Take on the same value no matter where it appears in a Stmt, and
76  * 2) Evaluating it has no side-effects
77  */
78 bool is_pure(const Expr &e);
79 
80 /** Construct an immediate of the given type from any numeric C++ type. */
81 // @{
84 Expr make_const(Type t, double val);
85 inline Expr make_const(Type t, int32_t val) {
86  return make_const(t, (int64_t)val);
87 }
88 inline Expr make_const(Type t, uint32_t val) {
89  return make_const(t, (uint64_t)val);
90 }
91 inline Expr make_const(Type t, int16_t val) {
92  return make_const(t, (int64_t)val);
93 }
94 inline Expr make_const(Type t, uint16_t val) {
95  return make_const(t, (uint64_t)val);
96 }
97 inline Expr make_const(Type t, int8_t val) {
98  return make_const(t, (int64_t)val);
99 }
100 inline Expr make_const(Type t, uint8_t val) {
101  return make_const(t, (uint64_t)val);
102 }
103 inline Expr make_const(Type t, bool val) {
104  return make_const(t, (uint64_t)val);
105 }
106 inline Expr make_const(Type t, float val) {
107  return make_const(t, (double)val);
108 }
109 inline Expr make_const(Type t, float16_t val) {
110  return make_const(t, (double)val);
111 }
112 // @}
113 
114 /** Construct a unique signed_integer_overflow Expr */
116 
117 /** Check if an expression is a signed_integer_overflow */
119 
120 /** Check if a constant value can be correctly represented as the given type. */
122 
123 /** Construct a boolean constant from a C++ boolean value.
124  * May also be a vector if width is given.
125  * It is not possible to coerce a C++ boolean to Expr because
126  * if we provide such a path then char objects can ambiguously
127  * be converted to Halide Expr or to std::string. The problem
128  * is that C++ does not have a real bool type - it is in fact
129  * close enough to char that C++ does not know how to distinguish them.
130  * make_bool is the explicit coercion. */
131 Expr make_bool(bool val, int lanes = 1);
132 
133 /** Construct the representation of zero in the given type */
135 
136 /** Construct the representation of one in the given type */
138 
139 /** Construct the representation of two in the given type */
141 
142 /** Construct the constant boolean true. May also be a vector of
143  * trues, if a lanes argument is given. */
144 Expr const_true(int lanes = 1);
145 
146 /** Construct the constant boolean false. May also be a vector of
147  * falses, if a lanes argument is given. */
148 Expr const_false(int lanes = 1);
149 
150 /** Attempt to cast an expression to a smaller type while provably not losing
151  * information. If it can't be done, return an undefined Expr.
152  *
153  * Optionally accepts a map that gives the constant bounds of exprs already
154  * analyzed to avoid redoing work across many calls to lossless_cast. It is not
155  * safe to use this optional map in contexts where the same Expr object may
156  * take on a different value. For example:
157  * (let x = 4 in some_expr_object) + (let x = 5 in the_same_expr_object)).
158  * It is safe to use it after uniquify_variable_names has been run. */
159 Expr lossless_cast(Type t, Expr e, std::map<Expr, ConstantInterval, ExprCompare> *cache = nullptr);
160 
161 /** Attempt to negate x without introducing new IR and without overflow.
162  * If it can't be done, return an undefined Expr. */
164 
165 /** Coerce the two expressions to have the same type, using C-style
166  * casting rules. For the purposes of casting, a boolean type is
167  * UInt(1). We use the following procedure:
168  *
169  * If the types already match, do nothing.
170  *
171  * Then, if one type is a vector and the other is a scalar, the scalar
172  * is broadcast to match the vector width, and we continue.
173  *
174  * Then, if one type is floating-point and the other is not, the
175  * non-float is cast to the floating-point type, and we're done.
176  *
177  * Then, if both types are unsigned ints, the one with fewer bits is
178  * cast to match the one with more bits and we're done.
179  *
180  * Then, if both types are signed ints, the one with fewer bits is
181  * cast to match the one with more bits and we're done.
182  *
183  * Finally, if one type is an unsigned int and the other type is a signed
184  * int, both are cast to a signed int with the greater of the two
185  * bit-widths. For example, matching an Int(8) with a UInt(16) results
186  * in an Int(16).
187  *
188  */
189 void match_types(Expr &a, Expr &b);
190 
191 /** Asserts that both expressions are integer types and are either
192  * both signed or both unsigned. If one argument is scalar and the
193  * other a vector, the scalar is broadcasted to have the same number
194  * of lanes as the vector. If one expression is of narrower type than
195  * the other, it is widened to the bit width of the wider. */
196 void match_types_bitwise(Expr &a, Expr &b, const char *op_name);
197 
198 /** Halide's vectorizable transcendentals. */
199 // @{
200 Expr halide_log(const Expr &a);
201 Expr halide_exp(const Expr &a);
202 Expr halide_erf(const Expr &a);
203 // @}
204 
205 /** Raise an expression to an integer power by repeatedly multiplying
206  * it by itself. */
208 
209 /** Split a boolean condition into vector of ANDs. If 'cond' is undefined,
210  * return an empty vector. */
211 void split_into_ands(const Expr &cond, std::vector<Expr> &result);
212 
213 /** A builder to help create Exprs representing halide_buffer_t
214  * structs (e.g. foo.buffer) via calls to halide_buffer_init. Fill out
215  * the fields and then call build. The resulting Expr will be a call
216  * to halide_buffer_init with the struct members as arguments. If the
217  * buffer_memory field is undefined, it uses a call to alloca to make
218  * some stack memory for the buffer. If the shape_memory field is
219  * undefined, it similarly uses stack memory for the shape. If the
220  * shape_memory field is null, it uses the dim field already in the
221  * buffer. Other unitialized fields will take on a value of zero in
222  * the constructed buffer. */
227  int dimensions = 0;
228  std::vector<Expr> mins, extents, strides;
230  Expr build() const;
231 };
232 
233 /** If e is a ramp expression with stride, default 1, return the base,
234  * otherwise undefined. */
235 Expr strided_ramp_base(const Expr &e, int stride = 1);
236 
237 /** Implementations of division and mod that are specific to Halide.
238  * Use these implementations; do not use native C division or mod to
239  * simplify Halide expressions. Halide division and modulo satisify
240  * the Euclidean definition of division for integers a and b:
241  *
242  /code
243  when b != 0, (a/b)*b + a%b = a
244  0 <= a%b < |b|
245  /endcode
246  *
247  * Additionally, mod by zero returns zero, and div by zero returns
248  * zero. This makes mod and div total functions.
249  */
250 // @{
251 template<typename T>
252 inline T mod_imp(T a, T b) {
253  Type t = type_of<T>();
254  if (!t.is_float() && b == 0) {
255  return 0;
256  } else if (t.is_int()) {
257  int64_t ia = a;
258  int64_t ib = b;
259  int64_t a_neg = ia >> 63;
260  int64_t b_neg = ib >> 63;
261  int64_t b_zero = (ib == 0) ? -1 : 0;
262  ia -= a_neg;
263  int64_t r = ia % (ib | b_zero);
264  r += (a_neg & ((ib ^ b_neg) + ~b_neg));
265  r &= ~b_zero;
266  return r;
267  } else {
268  return a % b;
269  }
270 }
271 
272 template<typename T>
273 inline T div_imp(T a, T b) {
274  Type t = type_of<T>();
275  if (!t.is_float() && b == 0) {
276  return (T)0;
277  } else if (t.is_int()) {
278  // Do it as 64-bit
279  int64_t ia = a;
280  int64_t ib = b;
281  int64_t a_neg = ia >> 63;
282  int64_t b_neg = ib >> 63;
283  int64_t b_zero = (ib == 0) ? -1 : 0;
284  ib -= b_zero;
285  ia -= a_neg;
286  int64_t q = ia / ib;
287  q += a_neg & (~b_neg - b_neg);
288  q &= ~b_zero;
289  return (T)q;
290  } else {
291  return a / b;
292  }
293 }
294 // @}
295 
296 // Special cases for float, double.
297 template<>
298 inline float mod_imp<float>(float a, float b) {
299  float f = a - b * (floorf(a / b));
300  // The remainder has the same sign as b.
301  return f;
302 }
303 template<>
304 inline double mod_imp<double>(double a, double b) {
305  double f = a - b * (std::floor(a / b));
306  return f;
307 }
308 
309 template<>
310 inline float div_imp<float>(float a, float b) {
311  return a / b;
312 }
313 template<>
314 inline double div_imp<double>(double a, double b) {
315  return a / b;
316 }
317 
318 /** Return an Expr that is identical to the input Expr, but with
319  * all calls to likely() and likely_if_innermost() removed. */
321 
322 /** Return a Stmt that is identical to the input Stmt, but with
323  * all calls to likely() and likely_if_innermost() removed. */
325 
326 /** Return an Expr that is identical to the input Expr, but with
327  * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
329 
330 /** Return a Stmt that is identical to the input Stmt, but with
331  * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
333 
334 /** If the expression is a tag helper call, remove it and return
335  * the tagged expression. If not, returns the expression. */
337 
338 template<typename T>
340  static constexpr bool value = std::is_convertible<T, const char *>::value ||
341  std::is_convertible<T, Halide::Expr>::value;
342 };
343 
344 template<typename... Args>
345 struct all_are_printable_args : meta_and<is_printable_arg<Args>...> {};
346 
347 // Secondary args to print can be Exprs or const char *
348 inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args) {
349 }
350 
351 template<typename... Args>
352 inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, const char *arg, Args &&...more_args) {
353  args.emplace_back(std::string(arg));
354  collect_print_args(args, std::forward<Args>(more_args)...);
355 }
356 
357 template<typename... Args>
358 inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, Expr arg, Args &&...more_args) {
359  args.push_back(std::move(arg));
360  collect_print_args(args, std::forward<Args>(more_args)...);
361 }
362 
363 Expr requirement_failed_error(Expr condition, const std::vector<Expr> &args);
364 
365 Expr memoize_tag_helper(Expr result, const std::vector<Expr> &cache_key_values);
366 
367 /** Reset the counters used for random-number seeds in random_float/int/uint.
368  * (Note that the counters are incremented for each call, even if a seed is passed in.)
369  * This is used for multitarget compilation to ensure that each subtarget gets
370  * the same sequence of random numbers. */
372 
373 } // namespace Internal
374 
375 /** Cast an expression to the halide type corresponding to the C++ type T. */
376 template<typename T>
377 inline Expr cast(Expr a) {
378  return cast(type_of<T>(), std::move(a));
379 }
380 
381 /** Cast an expression to a new type. */
383 
384 /** Return the sum of two expressions, doing any necessary type
385  * coercion using \ref Internal::match_types */
387 
388 /** Add an expression and a constant integer. Coerces the type of the
389  * integer to match the type of the expression. Errors if the integer
390  * cannot be represented in the type of the expression. */
391 // @{
392 Expr operator+(Expr a, int b);
393 
394 /** Add a constant integer and an expression. Coerces the type of the
395  * integer to match the type of the expression. Errors if the integer
396  * cannot be represented in the type of the expression. */
397 Expr operator+(int a, Expr b);
398 
399 /** Modify the first expression to be the sum of two expressions,
400  * without changing its type. This casts the second argument to match
401  * the type of the first. */
403 
404 /** Return the difference of two expressions, doing any necessary type
405  * coercion using \ref Internal::match_types */
407 
408 /** Subtracts a constant integer from an expression. Coerces the type of the
409  * integer to match the type of the expression. Errors if the integer
410  * cannot be represented in the type of the expression. */
411 Expr operator-(Expr a, int b);
412 
413 /** Subtracts an expression from a constant integer. Coerces the type
414  * of the integer to match the type of the expression. Errors if the
415  * integer cannot be represented in the type of the expression. */
416 Expr operator-(int a, Expr b);
417 
418 /** Return the negative of the argument. Does no type casting, so more
419  * formally: return that number which when added to the original,
420  * yields zero of the same type. For unsigned integers the negative is
421  * still an unsigned integer. E.g. in UInt(8), the negative of 56 is
422  * 200, because 56 + 200 == 0 */
424 
425 /** Modify the first expression to be the difference of two expressions,
426  * without changing its type. This casts the second argument to match
427  * the type of the first. */
429 
430 /** Return the product of two expressions, doing any necessary type
431  * coercion using \ref Internal::match_types */
433 
434 /** Multiply an expression and a constant integer. Coerces the type of the
435  * integer to match the type of the expression. Errors if the integer
436  * cannot be represented in the type of the expression. */
437 Expr operator*(Expr a, int b);
438 
439 /** Multiply a constant integer and an expression. Coerces the type of
440  * the integer to match the type of the expression. Errors if the
441  * integer cannot be represented in the type of the expression. */
442 Expr operator*(int a, Expr b);
443 
444 /** Modify the first expression to be the product of two expressions,
445  * without changing its type. This casts the second argument to match
446  * the type of the first. */
448 
449 /** Return the ratio of two expressions, doing any necessary type
450  * coercion using \ref Internal::match_types. Note that integer
451  * division in Halide is not the same as integer division in C-like
452  * languages in two ways.
453  *
454  * First, signed integer division in Halide rounds according to the
455  * sign of the denominator. This means towards minus infinity for
456  * positive denominators, and towards positive infinity for negative
457  * denominators. This is unlike C, which rounds towards zero. This
458  * decision ensures that upsampling expressions like f(x/2, y/2) don't
459  * have funny discontinuities when x and y cross zero.
460  *
461  * Second, division by zero returns zero instead of faulting. For
462  * types where overflow is defined behavior, division of the largest
463  * negative signed integer by -1 returns the larged negative signed
464  * integer for the type (i.e. it wraps). This ensures that a division
465  * operation can never have a side-effect, which is helpful in Halide
466  * because scheduling directives can expand the domain of computation
467  * of a Func, potentially introducing new zero-division.
468  */
470 
471 /** Modify the first expression to be the ratio of two expressions,
472  * without changing its type. This casts the second argument to match
473  * the type of the first. Note that signed integer division in Halide
474  * rounds towards minus infinity, unlike C, which rounds towards
475  * zero. */
477 
478 /** Divides an expression by a constant integer. Coerces the type
479  * of the integer to match the type of the expression. Errors if the
480  * integer cannot be represented in the type of the expression. */
481 Expr operator/(Expr a, int b);
482 
483 /** Divides a constant integer by an expression. Coerces the type
484  * of the integer to match the type of the expression. Errors if the
485  * integer cannot be represented in the type of the expression. */
486 Expr operator/(int a, Expr b);
487 
488 /** Return the first argument reduced modulo the second, doing any
489  * necessary type coercion using \ref Internal::match_types. There are
490  * two key differences between C-like languages and Halide for the
491  * modulo operation, which complement the way division works.
492  *
493  * First, the result is never negative, so x % 2 is always zero or
494  * one, unlike in C-like languages. x % -2 is equivalent, and is also
495  * always zero or one. Second, mod by zero evaluates to zero (unlike
496  * in C, where it faults). This makes modulo, like division, a
497  * side-effect-free operation. */
499 
500 /** Mods an expression by a constant integer. Coerces the type
501  * of the integer to match the type of the expression. Errors if the
502  * integer cannot be represented in the type of the expression. */
503 Expr operator%(Expr a, int b);
504 
505 /** Mods a constant integer by an expression. Coerces the type
506  * of the integer to match the type of the expression. Errors if the
507  * integer cannot be represented in the type of the expression. */
508 Expr operator%(int a, Expr b);
509 
510 /** Return a boolean expression that tests whether the first argument
511  * is greater than the second, after doing any necessary type coercion
512  * using \ref Internal::match_types */
514 
515 /** Return a boolean expression that tests whether an expression is
516  * greater than a constant integer. Coerces the integer to the type of
517  * the expression. Errors if the integer is not representable in that
518  * type. */
519 Expr operator>(Expr a, int b);
520 
521 /** Return a boolean expression that tests whether a constant integer is
522  * greater than an expression. Coerces the integer to the type of
523  * the expression. Errors if the integer is not representable in that
524  * type. */
525 Expr operator>(int a, Expr b);
526 
527 /** Return a boolean expression that tests whether the first argument
528  * is less than the second, after doing any necessary type coercion
529  * using \ref Internal::match_types */
531 
532 /** Return a boolean expression that tests whether an expression is
533  * less than a constant integer. Coerces the integer to the type of
534  * the expression. Errors if the integer is not representable in that
535  * type. */
536 Expr operator<(Expr a, int b);
537 
538 /** Return a boolean expression that tests whether a constant integer is
539  * less than an expression. Coerces the integer to the type of
540  * the expression. Errors if the integer is not representable in that
541  * type. */
542 Expr operator<(int a, Expr b);
543 
544 /** Return a boolean expression that tests whether the first argument
545  * is less than or equal to the second, after doing any necessary type
546  * coercion using \ref Internal::match_types */
548 
549 /** Return a boolean expression that tests whether an expression is
550  * less than or equal to a constant integer. Coerces the integer to
551  * the type of the expression. Errors if the integer is not
552  * representable in that type. */
553 Expr operator<=(Expr a, int b);
554 
555 /** Return a boolean expression that tests whether a constant integer
556  * is less than or equal to an expression. Coerces the integer to the
557  * type of the expression. Errors if the integer is not representable
558  * in that type. */
559 Expr operator<=(int a, Expr b);
560 
561 /** Return a boolean expression that tests whether the first argument
562  * is greater than or equal to the second, after doing any necessary
563  * type coercion using \ref Internal::match_types */
565 
566 /** Return a boolean expression that tests whether an expression is
567  * greater than or equal to a constant integer. Coerces the integer to
568  * the type of the expression. Errors if the integer is not
569  * representable in that type. */
570 Expr operator>=(const Expr &a, int b);
571 
572 /** Return a boolean expression that tests whether a constant integer
573  * is greater than or equal to an expression. Coerces the integer to the
574  * type of the expression. Errors if the integer is not representable
575  * in that type. */
576 Expr operator>=(int a, const Expr &b);
577 
578 /** Return a boolean expression that tests whether the first argument
579  * is equal to the second, after doing any necessary type coercion
580  * using \ref Internal::match_types */
582 
583 /** Return a boolean expression that tests whether an expression is
584  * equal to a constant integer. Coerces the integer to the type of the
585  * expression. Errors if the integer is not representable in that
586  * type. */
587 Expr operator==(Expr a, int b);
588 
589 /** Return a boolean expression that tests whether a constant integer
590  * is equal to an expression. Coerces the integer to the type of the
591  * expression. Errors if the integer is not representable in that
592  * type. */
593 Expr operator==(int a, Expr b);
594 
595 /** Return a boolean expression that tests whether the first argument
596  * is not equal to the second, after doing any necessary type coercion
597  * using \ref Internal::match_types */
599 
600 /** Return a boolean expression that tests whether an expression is
601  * not equal to a constant integer. Coerces the integer to the type of
602  * the expression. Errors if the integer is not representable in that
603  * type. */
604 Expr operator!=(Expr a, int b);
605 
606 /** Return a boolean expression that tests whether a constant integer
607  * is not equal to an expression. Coerces the integer to the type of
608  * the expression. Errors if the integer is not representable in that
609  * type. */
610 Expr operator!=(int a, Expr b);
611 
612 /** Returns the logical and of the two arguments */
614 
615 /** Logical and of an Expr and a bool. Either returns the Expr or an
616  * Expr representing false, depending on the bool. */
617 // @{
618 Expr operator&&(Expr a, bool b);
619 Expr operator&&(bool a, Expr b);
620 // @}
621 
622 /** Returns the logical or of the two arguments */
624 
625 /** Logical or of an Expr and a bool. Either returns the Expr or an
626  * Expr representing true, depending on the bool. */
627 // @{
628 Expr operator||(Expr a, bool b);
629 Expr operator||(bool a, Expr b);
630 // @}
631 
632 /** Returns the logical not the argument */
634 
635 /** Returns an expression representing the greater of the two
636  * arguments, after doing any necessary type coercion using
637  * \ref Internal::match_types. Vectorizes cleanly on most platforms
638  * (with the exception of integer types on x86 without SSE4). */
640 
641 /** Returns an expression representing the greater of an expression
642  * and a constant integer. The integer is coerced to the type of the
643  * expression. Errors if the integer is not representable as that
644  * type. Vectorizes cleanly on most platforms (with the exception of
645  * integer types on x86 without SSE4). */
646 Expr max(Expr a, int b);
647 
648 /** Returns an expression representing the greater of a constant
649  * integer and an expression. The integer is coerced to the type of
650  * the expression. Errors if the integer is not representable as that
651  * type. Vectorizes cleanly on most platforms (with the exception of
652  * integer types on x86 without SSE4). */
653 Expr max(int a, Expr b);
654 
655 inline Expr max(float a, Expr b) {
656  return max(Expr(a), std::move(b));
657 }
658 inline Expr max(Expr a, float b) {
659  return max(std::move(a), Expr(b));
660 }
661 
662 /** Returns an expression representing the greater of an expressions
663  * vector, after doing any necessary type coersion using
664  * \ref Internal::match_types. Vectorizes cleanly on most platforms
665  * (with the exception of integer types on x86 without SSE4).
666  * The expressions are folded from right ie. max(.., max(.., ..)).
667  * The arguments can be any mix of types but must all be convertible to Expr. */
668 template<typename A, typename B, typename C, typename... Rest,
669  typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
670 inline Expr max(A &&a, B &&b, C &&c, Rest &&...rest) {
671  return max(std::forward<A>(a), max(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
672 }
673 
675 
676 /** Returns an expression representing the lesser of an expression
677  * and a constant integer. The integer is coerced to the type of the
678  * expression. Errors if the integer is not representable as that
679  * type. Vectorizes cleanly on most platforms (with the exception of
680  * integer types on x86 without SSE4). */
681 Expr min(Expr a, int b);
682 
683 /** Returns an expression representing the lesser of a constant
684  * integer and an expression. The integer is coerced to the type of
685  * the expression. Errors if the integer is not representable as that
686  * type. Vectorizes cleanly on most platforms (with the exception of
687  * integer types on x86 without SSE4). */
688 Expr min(int a, Expr b);
689 
690 inline Expr min(float a, Expr b) {
691  return min(Expr(a), std::move(b));
692 }
693 inline Expr min(Expr a, float b) {
694  return min(std::move(a), Expr(b));
695 }
696 
697 /** Returns an expression representing the lesser of an expressions
698  * vector, after doing any necessary type coersion using
699  * \ref Internal::match_types. Vectorizes cleanly on most platforms
700  * (with the exception of integer types on x86 without SSE4).
701  * The expressions are folded from right ie. min(.., min(.., ..)).
702  * The arguments can be any mix of types but must all be convertible to Expr. */
703 template<typename A, typename B, typename C, typename... Rest,
704  typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
705 inline Expr min(A &&a, B &&b, C &&c, Rest &&...rest) {
706  return min(std::forward<A>(a), min(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
707 }
708 
709 /** Operators on floats treats those floats as Exprs. Making these
710  * explicit prevents implicit float->int casts that might otherwise
711  * occur. */
712 // @{
713 inline Expr operator+(Expr a, float b) {
714  return std::move(a) + Expr(b);
715 }
716 inline Expr operator+(float a, Expr b) {
717  return Expr(a) + std::move(b);
718 }
719 inline Expr operator-(Expr a, float b) {
720  return std::move(a) - Expr(b);
721 }
722 inline Expr operator-(float a, Expr b) {
723  return Expr(a) - std::move(b);
724 }
725 inline Expr operator*(Expr a, float b) {
726  return std::move(a) * Expr(b);
727 }
728 inline Expr operator*(float a, Expr b) {
729  return Expr(a) * std::move(b);
730 }
731 inline Expr operator/(Expr a, float b) {
732  return std::move(a) / Expr(b);
733 }
734 inline Expr operator/(float a, Expr b) {
735  return Expr(a) / std::move(b);
736 }
737 inline Expr operator%(Expr a, float b) {
738  return std::move(a) % Expr(b);
739 }
740 inline Expr operator%(float a, Expr b) {
741  return Expr(a) % std::move(b);
742 }
743 inline Expr operator>(Expr a, float b) {
744  return std::move(a) > Expr(b);
745 }
746 inline Expr operator>(float a, Expr b) {
747  return Expr(a) > std::move(b);
748 }
749 inline Expr operator<(Expr a, float b) {
750  return std::move(a) < Expr(b);
751 }
752 inline Expr operator<(float a, Expr b) {
753  return Expr(a) < std::move(b);
754 }
755 inline Expr operator>=(Expr a, float b) {
756  return std::move(a) >= Expr(b);
757 }
758 inline Expr operator>=(float a, Expr b) {
759  return Expr(a) >= std::move(b);
760 }
761 inline Expr operator<=(Expr a, float b) {
762  return std::move(a) <= Expr(b);
763 }
764 inline Expr operator<=(float a, Expr b) {
765  return Expr(a) <= std::move(b);
766 }
767 inline Expr operator==(Expr a, float b) {
768  return std::move(a) == Expr(b);
769 }
770 inline Expr operator==(float a, Expr b) {
771  return Expr(a) == std::move(b);
772 }
773 inline Expr operator!=(Expr a, float b) {
774  return std::move(a) != Expr(b);
775 }
776 inline Expr operator!=(float a, Expr b) {
777  return Expr(a) != std::move(b);
778 }
779 // @}
780 
781 /** Clamps an expression to lie within the given bounds. The bounds
782  * are type-cast to match the expression. Vectorizes as well as min/max. */
783 Expr clamp(Expr a, const Expr &min_val, const Expr &max_val);
784 
785 /** Returns the absolute value of a signed integer or floating-point
786  * expression. Vectorizes cleanly. Unlike in C, abs of a signed
787  * integer returns an unsigned integer of the same bit width. This
788  * means that abs of the most negative integer doesn't overflow. */
790 
791 /** Return the absolute difference between two values. Vectorizes
792  * cleanly. Returns an unsigned value of the same bit width. There are
793  * various ways to write this yourself, but they contain numerous
794  * gotchas and don't always compile to good code, so use this
795  * instead. */
797 
798 /** Returns an expression similar to the ternary operator in C, except
799  * that it always evaluates all arguments. If the first argument is
800  * true, then return the second, else return the third. Typically
801  * vectorizes cleanly, but benefits from SSE41 or newer on x86. */
802 Expr select(Expr condition, Expr true_value, Expr false_value);
803 
804 /** A multi-way variant of select similar to a switch statement in C,
805  * which can accept multiple conditions and values in pairs. Evaluates
806  * to the first value for which the condition is true. Returns the
807  * final value if all conditions are false. */
808 template<typename... Args,
809  typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Args...>::value>::type * = nullptr>
810 inline Expr select(Expr c0, Expr v0, Expr c1, Expr v1, Args &&...args) {
811  return select(std::move(c0), std::move(v0), select(std::move(c1), std::move(v1), std::forward<Args>(args)...));
812 }
813 
814 /** Equivalent of ternary select(), but taking/returning tuples. If the condition is
815  * a Tuple, it must match the size of the true and false Tuples. */
816 // @{
817 Tuple select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value);
818 Tuple select(const Expr &condition, const Tuple &true_value, const Tuple &false_value);
819 // @}
820 
821 /** Equivalent of multiway select(), but taking/returning tuples. If the condition is
822  * a Tuple, it must match the size of the true and false Tuples. */
823 // @{
824 template<typename... Args>
825 inline Tuple select(const Tuple &c0, const Tuple &v0, const Tuple &c1, const Tuple &v1, Args &&...args) {
826  return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
827 }
828 template<typename... Args>
829 inline Tuple select(const Expr &c0, const Tuple &v0, const Expr &c1, const Tuple &v1, Args &&...args) {
830  return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
831 }
832 // @}
833 
834 /** select applied to FuncRefs (e.g. select(x < 100, f(x), g(x))) is assumed to
835  * return an Expr. A runtime error is produced if this is applied to
836  * tuple-valued Funcs. In that case you should explicitly cast the second and
837  * third args to Tuple to remove the ambiguity. */
838 // @{
839 Expr select(const Expr &condition, const FuncRef &true_value, const FuncRef &false_value);
840 template<typename... Args>
841 inline Expr select(const Expr &c0, const FuncRef &v0, const Expr &c1, const FuncRef &v1, Args &&...args) {
842  return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
843 }
844 // @}
845 
846 /** Oftentimes we want to pack a list of expressions with the same type
847  * into a channel dimension, e.g.,
848  * img(x, y, c) = select(c == 0, 100, // Red
849  * c == 1, 50, // Green
850  * 25); // Blue
851  * This is tedious when the list is long. The following function
852  * provide convinent syntax that allow one to write:
853  * img(x, y, c) = mux(c, {100, 50, 25});
854  *
855  * As with the select equivalent, if the first argument (the index) is
856  * out of range, the expression evaluates to the last value.
857  */
858 // @{
859 Expr mux(const Expr &id, const std::initializer_list<Expr> &values);
860 Expr mux(const Expr &id, const std::vector<Expr> &values);
861 Expr mux(const Expr &id, const Tuple &values);
862 Expr mux(const Expr &id, const std::initializer_list<FuncRef> &values);
863 Tuple mux(const Expr &id, const std::initializer_list<Tuple> &values);
864 Tuple mux(const Expr &id, const std::vector<Tuple> &values);
865 // @}
866 
867 /** Return the sine of a floating-point expression. If the argument is
868  * not floating-point, it is cast to Float(32). Does not vectorize
869  * well. */
871 
872 /** Return the arcsine of a floating-point expression. If the argument
873  * is not floating-point, it is cast to Float(32). Does not vectorize
874  * well. */
876 
877 /** Return the cosine of a floating-point expression. If the argument
878  * is not floating-point, it is cast to Float(32). Does not vectorize
879  * well. */
881 
882 /** Return the arccosine of a floating-point expression. If the
883  * argument is not floating-point, it is cast to Float(32). Does not
884  * vectorize well. */
886 
887 /** Return the tangent of a floating-point expression. If the argument
888  * is not floating-point, it is cast to Float(32). Does not vectorize
889  * well. */
891 
892 /** Return the arctangent of a floating-point expression. If the
893  * argument is not floating-point, it is cast to Float(32). Does not
894  * vectorize well. */
896 
897 /** Return the angle of a floating-point gradient. If the argument is
898  * not floating-point, it is cast to Float(32). Does not vectorize
899  * well. */
901 
902 /** Return the hyperbolic sine of a floating-point expression. If the
903  * argument is not floating-point, it is cast to Float(32). Does not
904  * vectorize well. */
906 
907 /** Return the hyperbolic arcsinhe of a floating-point expression. If
908  * the argument is not floating-point, it is cast to Float(32). Does
909  * not vectorize well. */
911 
912 /** Return the hyperbolic cosine of a floating-point expression. If
913  * the argument is not floating-point, it is cast to Float(32). Does
914  * not vectorize well. */
916 
917 /** Return the hyperbolic arccosine of a floating-point expression.
918  * If the argument is not floating-point, it is cast to
919  * Float(32). Does not vectorize well. */
921 
922 /** Return the hyperbolic tangent of a floating-point expression. If
923  * the argument is not floating-point, it is cast to Float(32). Does
924  * not vectorize well. */
926 
927 /** Return the hyperbolic arctangent of a floating-point expression.
928  * If the argument is not floating-point, it is cast to
929  * Float(32). Does not vectorize well. */
931 
932 /** Return the square root of a floating-point expression. If the
933  * argument is not floating-point, it is cast to Float(32). Typically
934  * vectorizes cleanly. */
936 
937 /** Return the square root of the sum of the squares of two
938  * floating-point expressions. If the argument is not floating-point,
939  * it is cast to Float(32). Vectorizes cleanly. */
940 Expr hypot(const Expr &x, const Expr &y);
941 
942 /** Return the exponential of a floating-point expression. If the
943  * argument is not floating-point, it is cast to Float(32). For
944  * Float(64) arguments, this calls the system exp function, and does
945  * not vectorize well. For Float(32) arguments, this function is
946  * vectorizable, does the right thing for extremely small or extremely
947  * large inputs, and is accurate up to the last bit of the
948  * mantissa. Vectorizes cleanly. */
950 
951 /** Return the logarithm of a floating-point expression. If the
952  * argument is not floating-point, it is cast to Float(32). For
953  * Float(64) arguments, this calls the system log function, and does
954  * not vectorize well. For Float(32) arguments, this function is
955  * vectorizable, does the right thing for inputs <= 0 (returns -inf or
956  * nan), and is accurate up to the last bit of the
957  * mantissa. Vectorizes cleanly. */
959 
960 /** Return one floating point expression raised to the power of
961  * another. The type of the result is given by the type of the first
962  * argument. If the first argument is not a floating-point type, it is
963  * cast to Float(32). For Float(32), cleanly vectorizable, and
964  * accurate up to the last few bits of the mantissa. Gets worse when
965  * approaching overflow. Vectorizes cleanly. */
967 
968 /** Evaluate the error function erf. Only available for
969  * Float(32). Accurate up to the last three bits of the
970  * mantissa. Vectorizes cleanly. */
971 Expr erf(const Expr &x);
972 
973 /** Fast vectorizable approximation to some trigonometric functions for Float(32).
974  * Absolute approximation error is less than 1e-5. */
975 // @{
976 Expr fast_sin(const Expr &x);
977 Expr fast_cos(const Expr &x);
978 // @}
979 
980 /** Fast approximate cleanly vectorizable log for Float(32). Returns
981  * nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
982  * mantissa. Vectorizes cleanly. */
983 Expr fast_log(const Expr &x);
984 
985 /** Fast approximate cleanly vectorizable exp for Float(32). Returns
986  * nonsense for inputs that would overflow or underflow. Typically
987  * accurate up to the last 5 bits of the mantissa. Gets worse when
988  * approaching overflow. Vectorizes cleanly. */
989 Expr fast_exp(const Expr &x);
990 
991 /** Fast approximate cleanly vectorizable pow for Float(32). Returns
992  * nonsense for x < 0.0f. Accurate up to the last 5 bits of the
993  * mantissa for typical exponents. Gets worse when approaching
994  * overflow. Vectorizes cleanly. */
996 
997 /** Fast approximate inverse for Float(32). Corresponds to the rcpps
998  * instruction on x86, and the vrecpe instruction on ARM. Vectorizes
999  * cleanly. Note that this can produce slightly different results
1000  * across different implementations of the same architecture (e.g. AMD vs Intel),
1001  * even when strict_float is enabled. */
1003 
1004 /** Fast approximate inverse square root for Float(32). Corresponds to
1005  * the rsqrtps instruction on x86, and the vrsqrte instruction on
1006  * ARM. Vectorizes cleanly. Note that this can produce slightly different results
1007  * across different implementations of the same architecture (e.g. AMD vs Intel),
1008  * even when strict_float is enabled. */
1010 
1011 /** Return the greatest whole number less than or equal to a
1012  * floating-point expression. If the argument is not floating-point,
1013  * it is cast to Float(32). The return value is still in floating
1014  * point, despite being a whole number. Vectorizes cleanly. */
1016 
1017 /** Return the least whole number greater than or equal to a
1018  * floating-point expression. If the argument is not floating-point,
1019  * it is cast to Float(32). The return value is still in floating
1020  * point, despite being a whole number. Vectorizes cleanly. */
1022 
1023 /** Return the whole number closest to a floating-point expression. If the
1024  * argument is not floating-point, it is cast to Float(32). The return value is
1025  * still in floating point, despite being a whole number. On ties, we round
1026  * towards the nearest even integer. Note that this is not the same as
1027  * std::round in C, which rounds away from zero. On platforms without a native
1028  * instruction for this, it is emulated, and may be more expensive than
1029  * cast<int>(x + 0.5f) or similar. */
1031 
1032 /** Return the integer part of a floating-point expression. If the argument is
1033  * not floating-point, it is cast to Float(32). The return value is still in
1034  * floating point, despite being a whole number. Vectorizes cleanly. */
1036 
1037 /** Returns true if the argument is a Not a Number (NaN). Requires a
1038  * floating point argument. Vectorizes cleanly.
1039  * Note that the Expr passed in will be evaluated in strict_float mode,
1040  * regardless of whether strict_float mode is enabled in the current Target. */
1042 
1043 /** Returns true if the argument is Inf or -Inf. Requires a
1044  * floating point argument. Vectorizes cleanly.
1045  * Note that the Expr passed in will be evaluated in strict_float mode,
1046  * regardless of whether strict_float mode is enabled in the current Target. */
1048 
1049 /** Returns true if the argument is a finite value (ie, neither NaN nor Inf).
1050  * Requires a floating point argument. Vectorizes cleanly.
1051  * Note that the Expr passed in will be evaluated in strict_float mode,
1052  * regardless of whether strict_float mode is enabled in the current Target. */
1054 
1055 /** Return the fractional part of a floating-point expression. If the argument
1056  * is not floating-point, it is cast to Float(32). The return value has the
1057  * same sign as the original expression. Vectorizes cleanly. */
1058 Expr fract(const Expr &x);
1059 
1060 /** Reinterpret the bits of one value as another type. */
1062 
1063 template<typename T>
1065  return reinterpret(type_of<T>(), std::move(e));
1066 }
1067 
1068 /** Return the bitwise and of two expressions (which need not have the
1069  * same type). The result type is the wider of the two expressions.
1070  * Only integral types are allowed and both expressions must be signed
1071  * or both must be unsigned. */
1073 
1074 /** Return the bitwise and of an expression and an integer. The type
1075  * of the result is the type of the expression argument. */
1076 // @{
1077 Expr operator&(Expr x, int y);
1078 Expr operator&(int x, Expr y);
1079 // @}
1080 
1081 /** Return the bitwise or of two expressions (which need not have the
1082  * same type). The result type is the wider of the two expressions.
1083  * Only integral types are allowed and both expressions must be signed
1084  * or both must be unsigned. */
1086 
1087 /** Return the bitwise or of an expression and an integer. The type of
1088  * the result is the type of the expression argument. */
1089 // @{
1090 Expr operator|(Expr x, int y);
1091 Expr operator|(int x, Expr y);
1092 // @}
1093 
1094 /** Return the bitwise xor of two expressions (which need not have the
1095  * same type). The result type is the wider of the two expressions.
1096  * Only integral types are allowed and both expressions must be signed
1097  * or both must be unsigned. */
1099 
1100 /** Return the bitwise xor of an expression and an integer. The type
1101  * of the result is the type of the expression argument. */
1102 // @{
1103 Expr operator^(Expr x, int y);
1104 Expr operator^(int x, Expr y);
1105 // @}
1106 
1107 /** Return the bitwise not of an expression. */
1109 
1110 /** Shift the bits of an integer value left. This is actually less
1111  * efficient than multiplying by 2^n, because Halide's optimization
1112  * passes understand multiplication, and will compile it to
1113  * shifting. This operator is only for if you really really need bit
1114  * shifting (e.g. because the exponent is a run-time parameter). The
1115  * type of the result is equal to the type of the first argument. Both
1116  * arguments must have integer type. */
1117 // @{
1120 // @}
1121 
1122 /** Shift the bits of an integer value right. Does sign extension for
1123  * signed integers. This is less efficient than dividing by a power of
1124  * two. Halide's definition of division (always round to negative
1125  * infinity) means that all divisions by powers of two get compiled to
1126  * bit-shifting, and Halide's optimization routines understand
1127  * division and can work with it. The type of the result is equal to
1128  * the type of the first argument. Both arguments must have integer
1129  * type. */
1130 // @{
1133 // @}
1134 
1135 /** Linear interpolate between the two values according to a weight.
1136  * \param zero_val The result when weight is 0
1137  * \param one_val The result when weight is 1
1138  * \param weight The interpolation amount
1139  *
1140  * Both zero_val and one_val must have the same type. All types are
1141  * supported, including bool.
1142  *
1143  * The weight is treated as its own type and must be float or an
1144  * unsigned integer type. It is scaled to the bit-size of the type of
1145  * x and y if they are integer, or converted to float if they are
1146  * float. Integer weights are converted to float via division by the
1147  * full-range value of the weight's type. Floating-point weights used
1148  * to interpolate between integer values must be between 0.0f and
1149  * 1.0f, and an error may be signaled if it is not provably so. (clamp
1150  * operators can be added to provide proof. Currently an error is only
1151  * signalled for constant weights.)
1152  *
1153  * For integer linear interpolation, out of range values cannot be
1154  * represented. In particular, weights that are conceptually less than
1155  * 0 or greater than 1.0 are not representable. As such the result is
1156  * always between x and y (inclusive of course). For lerp with
1157  * floating-point values and floating-point weight, the full range of
1158  * a float is valid, however underflow and overflow can still occur.
1159  *
1160  * Ordering is not required between zero_val and one_val:
1161  * lerp(42, 69, .5f) == lerp(69, 42, .5f) == 56
1162  *
1163  * Results for integer types are for exactly rounded arithmetic. As
1164  * such, there are cases where 16-bit and float differ because 32-bit
1165  * floating-point (float) does not have enough precision to produce
1166  * the exact result. (Likely true for 32-bit integer
1167  * vs. double-precision floating-point as well.)
1168  *
1169  * At present, double precision and 64-bit integers are not supported.
1170  *
1171  * Generally, lerp will vectorize as if it were an operation on a type
1172  * twice the bit size of the inferred type for x and y.
1173  *
1174  * Some examples:
1175  * \code
1176  *
1177  * // Since Halide does not have direct type delcarations, casts
1178  * // below are used to indicate the types of the parameters.
1179  * // Such casts not required or expected in actual code where types
1180  * // are inferred.
1181  *
1182  * lerp(cast<float>(x), cast<float>(y), cast<float>(w)) ->
1183  * x * (1.0f - w) + y * w
1184  *
1185  * lerp(cast<uint8_t>(x), cast<uint8_t>(y), cast<uint8_t>(w)) ->
1186  * cast<uint8_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1187  * cast<uint8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1188  *
1189  * // Note addition in Halide promoted uint8_t + int8_t to int16_t already,
1190  * // the outer cast is added for clarity.
1191  * lerp(cast<uint8_t>(x), cast<int8_t>(y), cast<uint8_t>(w)) ->
1192  * cast<int16_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1193  * cast<int8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1194  *
1195  * lerp(cast<int8_t>(x), cast<int8_t>(y), cast<float>(w)) ->
1196  * cast<int8_t>(cast<int8_t>(x) * (1.0f - cast<float>(w)) +
1197  * cast<int8_t>(y) * cast<uint8_t>(w))
1198  *
1199  * \endcode
1200  * */
1201 Expr lerp(Expr zero_val, Expr one_val, Expr weight);
1202 
1203 /** Count the number of set bits in an expression. */
1205 
1206 /** Count the number of leading zero bits in an expression. If the expression is
1207  * zero, the result is the number of bits in the type. */
1209 
1210 /** Count the number of trailing zero bits in an expression. If the expression is
1211  * zero, the result is the number of bits in the type. */
1213 
1214 /** Divide two integers, rounding towards zero. This is the typical
1215  * behavior of most hardware architectures, which differs from
1216  * Halide's division operator, which is Euclidean (rounds towards
1217  * -infinity). Will throw a runtime error if y is zero, or if y is -1
1218  * and x is the minimum signed integer. */
1220 
1221 /** Compute the remainder of dividing two integers, when division is
1222  * rounding toward zero. This is the typical behavior of most hardware
1223  * architectures, which differs from Halide's mod operator, which is
1224  * Euclidean (produces the remainder when division rounds towards
1225  * -infinity). Will throw a runtime error if y is zero. */
1227 
1228 /** Return a random variable representing a uniformly distributed
1229  * float in the half-open interval [0.0f, 1.0f). For random numbers of
1230  * other types, use lerp with a random float as the last parameter.
1231  *
1232  * Optionally takes a seed.
1233  *
1234  * Note that:
1235  \code
1236  Expr x = random_float();
1237  Expr y = x + x;
1238  \endcode
1239  *
1240  * is very different to
1241  *
1242  \code
1243  Expr y = random_float() + random_float();
1244  \endcode
1245  *
1246  * The first doubles a random variable, and the second adds two
1247  * independent random variables.
1248  *
1249  * A given random variable takes on a unique value that depends
1250  * deterministically on the pure variables of the function they belong
1251  * to, the identity of the function itself, and which definition of
1252  * the function it is used in. They are, however, shared across tuple
1253  * elements.
1254  *
1255  * This function vectorizes cleanly.
1256  */
1258 
1259 /** Return a random variable representing a uniformly distributed
1260  * unsigned 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1262 
1263 /** Return a random variable representing a uniformly distributed
1264  * 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1266 
1267 /** Create an Expr that prints out its value whenever it is
1268  * evaluated. It also prints out everything else in the arguments
1269  * list, separated by spaces. This can include string literals. */
1270 //@{
1271 Expr print(const std::vector<Expr> &values);
1272 
1273 template<typename... Args>
1274 inline HALIDE_NO_USER_CODE_INLINE Expr print(Expr a, Args &&...args) {
1275  std::vector<Expr> collected_args = {std::move(a)};
1276  Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1277  return print(collected_args);
1278 }
1279 //@}
1280 
1281 /** Create an Expr that prints whenever it is evaluated, provided that
1282  * the condition is true. */
1283 // @{
1284 Expr print_when(Expr condition, const std::vector<Expr> &values);
1285 
1286 template<typename... Args>
1287 inline HALIDE_NO_USER_CODE_INLINE Expr print_when(Expr condition, Expr a, Args &&...args) {
1288  std::vector<Expr> collected_args = {std::move(a)};
1289  Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1290  return print_when(std::move(condition), collected_args);
1291 }
1292 
1293 // @}
1294 
1295 /** Create an Expr that that guarantees a precondition.
1296  * If 'condition' is true, the return value is equal to the first Expr.
1297  * If 'condition' is false, halide_error() is called, and the return value
1298  * is arbitrary. Any additional arguments after the first Expr are stringified
1299  * and passed as a user-facing message to halide_error(), similar to print().
1300  *
1301  * Note that this essentially *always* inserts a runtime check into the
1302  * generated code (except when the condition can be proven at compile time);
1303  * as such, it should be avoided inside inner loops, except for debugging
1304  * or testing purposes. Note also that it does not vectorize cleanly (vector
1305  * values will be scalarized for the check).
1306  *
1307  * However, using this to make assertions about (say) input values
1308  * can be useful, both in terms of correctness and (potentially) in terms
1309  * of code generation, e.g.
1310  \code
1311  Param<int> p;
1312  Expr y = require(p > 0, p);
1313  \endcode
1314  * will allow the optimizer to assume positive, nonzero values for y.
1315  */
1316 // @{
1317 Expr require(Expr condition, const std::vector<Expr> &values);
1318 
1319 template<typename... Args>
1320 inline HALIDE_NO_USER_CODE_INLINE Expr require(Expr condition, Expr value, Args &&...args) {
1321  std::vector<Expr> collected_args = {std::move(value)};
1322  Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1323  return require(std::move(condition), collected_args);
1324 }
1325 // @}
1326 
1327 /** Return an undef value of the given type. Halide skips stores that
1328  * depend on undef values, so you can use this to mean "do not modify
1329  * this memory location". This is an escape hatch that can be used for
1330  * several things:
1331  *
1332  * You can define a reduction with no pure step, by setting the pure
1333  * step to undef. Do this only if you're confident that the update
1334  * steps are sufficient to correctly fill in the domain.
1335  *
1336  * For a tuple-valued reduction, you can write an update step that
1337  * only updates some tuple elements.
1338  *
1339  * You can define single-stage pipeline that only has update steps,
1340  * and depends on the values already in the output buffer.
1341  *
1342  * Use this feature with great caution, as you can use it to load from
1343  * uninitialized memory.
1344  */
1346 
1347 template<typename T>
1348 inline Expr undef() {
1349  return undef(type_of<T>());
1350 }
1351 
1352 namespace Internal {
1353 
1354 /** Return an expression that should never be evaluated. Expressions
1355  * that depend on unreachabale values are also unreachable, and
1356  * statements that execute unreachable expressions are also considered
1357  * unreachable. */
1359 
1360 template<typename T>
1361 inline Expr unreachable() {
1362  return unreachable(type_of<T>());
1363 }
1364 
1365 } // namespace Internal
1366 
1367 /** Control the values used in the memoization cache key for memoize.
1368  * Normally parameters and other external dependencies are
1369  * automatically inferred and added to the cache key. The memoize_tag
1370  * operator allows computing one expression and using either the
1371  * computed value, or one or more other expressions in the cache key
1372  * instead of the parameter dependencies of the computation. The
1373  * single argument version is completely safe in that the cache key
1374  * will use the actual computed value -- it is difficult or imposible
1375  * to produce erroneous caching this way. The more-than-one argument
1376  * version allows generating cache keys that do not uniquely identify
1377  * the computation and thus can result in caching errors.
1378  *
1379  * A potential use for the single argument version is to handle a
1380  * floating-point parameter that is quantized to a small
1381  * integer. Mutliple values of the float will produce the same integer
1382  * and moving the caching to using the integer for the key is more
1383  * efficient.
1384  *
1385  * The main use for the more-than-one argument version is to provide
1386  * cache key information for Handles and ImageParams, which otherwise
1387  * are not allowed inside compute_cached operations. E.g. when passing
1388  * a group of parameters to an external array function via a Handle,
1389  * memoize_tag can be used to isolate the actual values used by that
1390  * computation. If an ImageParam is a constant image with a persistent
1391  * digest, memoize_tag can be used to key computations using that image
1392  * on the digest. */
1393 // @{
1394 template<typename... Args>
1395 inline HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args) {
1396  std::vector<Expr> collected_args{std::forward<Args>(args)...};
1397  return Internal::memoize_tag_helper(std::move(result), collected_args);
1398 }
1399 // @}
1400 
1401 /** Expressions tagged with this intrinsic are considered to be part
1402  * of the steady state of some loop with a nasty beginning and end
1403  * (e.g. a boundary condition). When Halide encounters likely
1404  * intrinsics, it splits the containing loop body into three, and
1405  * tries to simplify down all conditions that lead to the likely. For
1406  * example, given the expression: select(x < 1, bar, x > 10, bar,
1407  * likely(foo)), Halide will split the loop over x into portions where
1408  * x < 1, 1 <= x <= 10, and x > 10.
1409  *
1410  * You're unlikely to want to call this directly. You probably want to
1411  * use the boundary condition helpers in the BoundaryConditions
1412  * namespace instead.
1413  */
1415 
1416 /** Equivalent to likely, but only triggers a loop partitioning if
1417  * found in an innermost loop. */
1419 
1420 /** Cast an expression to the halide type corresponding to the C++
1421  * type T. As part of the cast, clamp to the minimum and maximum
1422  * values of the result type. */
1423 template<typename T>
1425  return saturating_cast(type_of<T>(), std::move(e));
1426 }
1427 
1428 /** Cast an expression to a new type, clamping to the minimum and
1429  * maximum values of the result type. */
1431 
1432 /** Makes a best effort attempt to preserve IEEE floating-point
1433  * semantics in evaluating an expression. May not be implemented for
1434  * all backends. (E.g. it is difficult to do this for C++ code
1435  * generation as it depends on the compiler flags used to compile the
1436  * generated code. */
1438 
1439 /** Create an Expr that that promises another Expr is clamped but do
1440  * not generate code to check the assertion or modify the value. No
1441  * attempt is made to prove the bound at compile time. (If it is
1442  * proved false as a result of something else, an error might be
1443  * generated, but it is also possible the compiler will crash.) The
1444  * promised bound is used in bounds inference so it will allow
1445  * satisfying bounds checks as well as possibly aiding optimization.
1446  *
1447  * unsafe_promise_clamped returns its first argument, the Expr 'value'
1448  *
1449  * This is a very easy way to make Halide generate erroneous code if
1450  * the bound promises is not kept. Use sparingly when there is no
1451  * other way to convey the information to the compiler and it is
1452  * required for a valuable optimization.
1453  *
1454  * Unsafe promises can be checked by turning on
1455  * Target::CheckUnsafePromises. This is intended for debugging only.
1456  */
1457 Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1458 
1459 namespace Internal {
1460 /**
1461  * FOR INTERNAL USE ONLY.
1462  *
1463  * An entirely unchecked version of unsafe_promise_clamped, used
1464  * inside the compiler as an annotation of the known bounds of an Expr
1465  * when it has proved something is bounded and wants to record that
1466  * fact for later passes (notably bounds inference) to exploit. This
1467  * gets introduced by GuardWithIf tail strategies, because the bounds
1468  * machinery has a hard time exploiting if statement conditions.
1469  *
1470  * Unlike unsafe_promise_clamped, this expression is
1471  * context-dependent, because 'value' might be statically bounded at
1472  * some point in the IR (e.g. due to a containing if statement), but
1473  * not elsewhere.
1474  *
1475  * This intrinsic always evaluates to its first argument. If this value is
1476  * used by a side-effecting operation and it is outside the range specified
1477  * by its second and third arguments, behavior is undefined. The compiler can
1478  * therefore assume that the value is within the range given and optimize
1479  * accordingly. Note that this permits promise_clamped to evaluate to
1480  * something outside of the range, provided that this value is not used.
1481  *
1482  * Note that this produces an intrinsic that is marked as 'pure' and thus is
1483  * allowed to be hoisted, etc.; thus, extra care must be taken with its use.
1484  **/
1485 Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1486 } // namespace Internal
1487 
1488 /** Scatter and gather are used for update definition which must store
1489  * multiple values to distinct locations at the same time. The
1490  * multiple expressions on the right-hand-side are bundled together
1491  * into a "gather", which must match a "scatter" the the same number
1492  * of arguments on the left-hand-size. For example, to store the
1493  * values 1 and 2 to the locations (x, y, 3) and (x, y, 4),
1494  * respectively:
1495  *
1496 \code
1497 f(x, y, scatter(3, 4)) = gather(1, 2);
1498 \endcode
1499  *
1500  * The result of gather or scatter can be treated as an
1501  * expression. Any containing operations on it can be assumed to
1502  * distribute over the elements. If two gather expressions are
1503  * combined with an arithmetic operator (e.g. added), they combine
1504  * element-wise. The following example stores the values 2 * x, 2 * y,
1505  * and 2 * c to the locations (x + 1, y, c), (x, y + 3, c), and (x, y,
1506  * c + 2) respectively:
1507  *
1508 \code
1509 f(x + scatter(1, 0, 0), y + scatter(0, 3, 0), c + scatter(0, 0, 2)) = 2 * gather(x, y, c);
1510 \endcode
1511 *
1512 * Repeated values in the scatter cause multiple stores to the same
1513 * location. The stores happen in order from left to right, so the
1514 * rightmost value wins. The following code is equivalent to f(x) = 5
1515 *
1516 \code
1517 f(scatter(x, x)) = gather(3, 5);
1518 \endcode
1519 *
1520 * Gathers are most useful for algorithms which require in-place
1521 * swapping or permutation of multiple elements, or other kinds of
1522 * in-place mutations that require loading multiple inputs, doing some
1523 * operations to them jointly, then storing them again. The following
1524 * update definition swaps the values of f at locations 3 and 5 if an
1525 * input parameter p is true:
1526 *
1527 \code
1528 f(scatter(3, 5)) = f(select(p, gather(5, 3), gather(3, 5)));
1529 \endcode
1530 *
1531 * For more examples of the use of scatter and gather, see
1532 * test/correctness/multiple_scatter.cpp
1533 *
1534 * It is not currently possible to use scatter and gather to write an
1535 * update definition in which the *number* of values loaded or stored
1536 * varies, as the size of the scatter/gather packet must be fixed a
1537 * compile-time. A workaround is to make the unwanted extra operations
1538 * a redundant copy of the last operation, which will be
1539 * dead-code-eliminated by the compiler. For example, the following
1540 * update definition swaps the values at locations 3 and 5 when the
1541 * parameter p is true, and rotates the values at locations 1, 2, and 3
1542 * when it is false. The load from 3 and store to 5 will be redundantly
1543 * repeated:
1544 *
1545 \code
1546 f(select(p, scatter(3, 5, 5), scatter(1, 2, 3))) = f(select(p, gather(5, 3, 3), gather(2, 3, 1)));
1547 \endcode
1548 *
1549 * Note that in the p == true case, we redudantly load from 3 and write
1550 * to 5 twice.
1551 */
1552 //@{
1553 Expr scatter(const std::vector<Expr> &args);
1554 Expr gather(const std::vector<Expr> &args);
1555 
1556 template<typename... Args>
1557 Expr scatter(const Expr &e, Args &&...args) {
1558  return scatter({e, std::forward<Args>(args)...});
1559 }
1560 
1561 template<typename... Args>
1562 Expr gather(const Expr &e, Args &&...args) {
1563  return gather({e, std::forward<Args>(args)...});
1564 }
1565 // @}
1566 
1567 /** Extract a contiguous subsequence of the bits of 'e', starting at the bit
1568  * index given by 'lsb', where zero is the least-significant bit, returning a
1569  * value of type 't'. Any out-of-range bits requested are filled with zeros.
1570  *
1571  * extract_bits is especially useful when one wants to load a small vector of a
1572  * wide type, and treat it as a larger vector of a smaller type. For example,
1573  * loading a vector of 32 uint8 values from a uint32 Func can be done as
1574  * follows:
1575 \code
1576 f8(x) = extract_bits<uint8_t>(f32(x/4), 8*(x%4));
1577 f8.align_bounds(x, 4).vectorize(x, 32);
1578 \endcode
1579  * Note that the align_bounds call is critical so that the narrow Exprs are
1580  * aligned to the wider Exprs. This makes the x%4 term collapse to a
1581  * constant. If f8 is an output Func, then constraining the min value of x to be
1582  * a known multiple of four would also be sufficient, e.g. via:
1583 \code
1584 f8.output_buffer().dim(0).set_min(0);
1585 \endcode
1586  *
1587  * See test/correctness/extract_concat_bits.cpp for a complete example. */
1588 // @{
1589 Expr extract_bits(Type t, const Expr &e, const Expr &lsb);
1590 
1591 template<typename T>
1592 Expr extract_bits(const Expr &e, const Expr &lsb) {
1593  return extract_bits(type_of<T>(), e, lsb);
1594 }
1595 // @}
1596 
1597 /** Given a number of Exprs of the same type, concatenate their bits producing a
1598  * single Expr of the same type code of the input but with more bits. The
1599  * number of arguments must be a power of two.
1600  *
1601  * concat_bits is especially useful when one wants to treat a Func containing
1602  * values of a narrow type as a Func containing fewer values of a wider
1603  * type. For example, the following code reinterprets vectors of 32 uint8 values
1604  * as a vector of 8 uint32s:
1605  *
1606 \code
1607 f32(x) = concat_bits({f8(4*x), f8(4*x + 1), f8(4*x + 2), f8(4*x + 3)});
1608 f32.vectorize(x, 8);
1609 \endcode
1610  *
1611  * See test/correctness/extract_concat_bits.cpp for a complete example.
1612  */
1613 Expr concat_bits(const std::vector<Expr> &e);
1614 
1615 /** Below is a collection of intrinsics for fixed-point programming. Most of
1616  * them can be expressed via other means, but this is more natural for some, as
1617  * it avoids ghost widened intermediates that don't (or shouldn't) actually show
1618  * up in codegen, and doesn't rely on pattern-matching inside the compiler to
1619  * succeed to get good instruction selection.
1620  *
1621  * The semantics of each call are defined in terms of a non-existent 'widen' and
1622  * 'narrow' operators, which stand in for casts that double or halve the
1623  * bit-width of a type respectively.
1624  */
1625 
1626 /** Compute a + widen(b). */
1628 
1629 /** Compute a * widen(b). */
1631 
1632 /** Compute a - widen(b). */
1634 
1635 /** Compute widen(a) + widen(b). */
1637 
1638 /** Compute widen(a) * widen(b). a and b may have different signedness, in which
1639  * case the result is signed. */
1641 
1642 /** Compute widen(a) - widen(b). The result is always signed. */
1644 
1645 /** Compute widen(a) << b. */
1646 //@{
1649 //@}
1650 
1651 /** Compute widen(a) >> b. */
1652 //@{
1655 //@}
1656 
1657 /** Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
1658  * When b is positive indicating a left shift, the rounding term is zero. */
1659 //@{
1662 //@}
1663 
1664 /** Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
1665  * When b is negative indicating a left shift, the rounding term is zero. */
1666 //@{
1669 //@}
1670 
1671 /** Compute saturating_narrow(widen(a) + widen(b)) */
1673 
1674 /** Compute saturating_narrow(widen(a) - widen(b)) */
1676 
1677 /** Compute narrow((widen(a) + widen(b)) / 2) */
1679 
1680 /** Compute narrow((widen(a) + widen(b) + 1) / 2) */
1682 
1683 /** Compute narrow((widen(a) - widen(b)) / 2) */
1685 
1686 /** Compute saturating_narrow(shift_right(widening_mul(a, b), q)) */
1687 //@{
1690 //@}
1691 
1692 /** Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q)) */
1693 //@{
1696 //@}
1697 
1698 /** Return a boolean Expr for the corresponding field of the Target
1699  * being used during lowering; they can be useful in writing library
1700  * code without having to plumb a Target through call sites, so that you
1701  * can do things like
1702  \code
1703  Expr e = select(target_arch_is(Target::ARM), something, something_else);
1704  \endcode
1705  * Note that this doesn't do any checking at runtime to verify that the Target
1706  * is valid for the current hardware configuration.
1707  */
1708 //@{
1712 //@}
1713 
1714 /** Return the bit width of the Target used during lowering; this can be useful
1715  * in writing library code without having to plumb a Target through call sites,
1716  * so that you can do things like
1717  \code
1718  Expr e = select(target_bits() == 32, something, something_else);
1719  \endcode
1720  * Note that this doesn't do any checking at runtime to verify that the Target
1721  * is valid for the current hardware configuration.
1722  */
1724 
1725 /** Return the natural vector width for the given Type for the Target
1726  * being used during lowering; this can be useful in writing library
1727  * code without having to plumb a Target through call sites, so that you
1728  * can do things like
1729  \code
1730  f.vectorize(x, target_natural_vector_size(Float(32)));
1731  \endcode
1732  * Note that this doesn't do any checking at runtime to verify that the Target
1733  * is valid for the current hardware configuration.
1734  */
1735 //@{
1737 template<typename data_t>
1739  return target_natural_vector_size(type_of<data_t>());
1740 }
1741 //@}
1742 
1743 } // namespace Halide
1744 
1745 #endif
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
Defines the structure that describes a Halide target.
Defines Tuple - the front-end handle on small arrays of expressions.
#define HALIDE_NO_USER_CODE_INLINE
Definition: Util.h:47
A fragment of front-end syntax of the form f(x, y, z), where x, y, z are Vars or Exprs.
Definition: Func.h:491
Create a small array of Exprs for defining and calling functions with multiple outputs.
Definition: Tuple.h:18
Expr make_one(Type t)
Construct the representation of one in the given type.
T div_imp(T a, T b)
Definition: IROperator.h:273
bool is_const_zero(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to zero (in all lanes,...
Expr memoize_tag_helper(Expr result, const std::vector< Expr > &cache_key_values)
Expr make_zero(Type t)
Construct the representation of zero in the given type.
bool is_negative_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly less than zero (in all lanes,...
bool is_undef(const Expr &e)
Is the expression an undef.
Expr requirement_failed_error(Expr condition, const std::vector< Expr > &args)
Expr make_two(Type t)
Construct the representation of two in the given type.
std::optional< double > as_const_float(const Expr &e)
If an expression is a FloatImm or a Broadcast of a FloatImm, return its value.
Expr unreachable(Type t=Int(32))
Return an expression that should never be evaluated.
void check_representable(Type t, int64_t val)
Check if a constant value can be correctly represented as the given type.
Expr halide_erf(const Expr &a)
bool is_const_one(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to one (in all lanes,...
void match_types(Expr &a, Expr &b)
Coerce the two expressions to have the same type, using C-style casting rules.
double div_imp< double >(double a, double b)
Definition: IROperator.h:314
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b)
Expr halide_exp(const Expr &a)
Expr make_const(Type t, int64_t val)
Construct an immediate of the given type from any numeric C++ type.
bool is_positive_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly greater than zero (in all lanes...
Expr const_true(int lanes=1)
Construct the constant boolean true.
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b)
bool is_signed_integer_overflow(const Expr &expr)
Check if an expression is a signed_integer_overflow.
T mod_imp(T a, T b)
Implementations of division and mod that are specific to Halide.
Definition: IROperator.h:252
std::optional< uint64_t > as_const_uint(const Expr &e)
If an expression is a UIntImm or a Broadcast of a UIntImm, return its value.
void reset_random_counters()
Reset the counters used for random-number seeds in random_float/int/uint.
Expr halide_log(const Expr &a)
Halide's vectorizable transcendentals.
bool is_pure(const Expr &e)
Does the expression 1) Take on the same value no matter where it appears in a Stmt,...
void split_into_ands(const Expr &cond, std::vector< Expr > &result)
Split a boolean condition into vector of ANDs.
Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max)
FOR INTERNAL USE ONLY.
bool is_no_op(const Stmt &s)
Is the statement a no-op (which we represent as either an undefined Stmt, or as an Evaluate node of a...
Expr unwrap_tags(const Expr &e)
If the expression is a tag helper call, remove it and return the tagged expression.
float div_imp< float >(float a, float b)
Definition: IROperator.h:310
Expr lossless_negate(const Expr &x)
Attempt to negate x without introducing new IR and without overflow.
Expr strided_ramp_base(const Expr &e, int stride=1)
If e is a ramp expression with stride, default 1, return the base, otherwise undefined.
Expr remove_promises(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to promise_clamped() and unsaf...
std::optional< int64_t > as_const_int(const Expr &e)
If an expression is an IntImm or a Broadcast of an IntImm, return a its value.
Expr const_false(int lanes=1)
Construct the constant boolean false.
double mod_imp< double >(double a, double b)
Definition: IROperator.h:304
Expr lossless_cast(Type t, Expr e, std::map< Expr, ConstantInterval, ExprCompare > *cache=nullptr)
Attempt to cast an expression to a smaller type while provably not losing information.
Expr make_bool(bool val, int lanes=1)
Construct a boolean constant from a C++ boolean value.
HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector< Expr > &args)
Definition: IROperator.h:348
std::optional< int > is_const_power_of_two_integer(const Expr &e)
Is the expression a constant integer power of two.
void match_types_bitwise(Expr &a, Expr &b, const char *op_name)
Asserts that both expressions are integer types and are either both signed or both unsigned.
float mod_imp< float >(float a, float b)
Definition: IROperator.h:298
Expr raise_to_integer_power(Expr a, int64_t b)
Raise an expression to an integer power by repeatedly multiplying it by itself.
Expr make_signed_integer_overflow(Type type)
Construct a unique signed_integer_overflow Expr.
bool is_const(const Expr &e)
Is the expression either an IntImm, a FloatImm, a StringImm, or a Cast of the same,...
Expr remove_likelies(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to likely() and likely_if_inne...
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
auto operator>=(const Other &a, const GeneratorParam< T > &b) -> decltype(a >=(T) b)
Greater than or equal comparison between GeneratorParam<T> and any type that supports operator>= with...
Definition: Generator.h:1104
Expr log(Expr x)
Return the logarithm of a floating-point expression.
Expr operator>>(Expr x, Expr y)
Shift the bits of an integer value right.
Expr ceil(Expr x)
Return the least whole number greater than or equal to a floating-point expression.
Expr widen_right_add(Expr a, Expr b)
Below is a collection of intrinsics for fixed-point programming.
Expr rounding_shift_right(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args)
Control the values used in the memoization cache key for memoize.
Definition: IROperator.h:1395
Expr fast_log(const Expr &x)
Fast approximate cleanly vectorizable log for Float(32).
Expr count_leading_zeros(Expr x)
Count the number of leading zero bits in an expression.
Expr reinterpret(Type t, Expr e)
Reinterpret the bits of one value as another type.
Expr saturating_add(Expr a, Expr b)
Compute saturating_narrow(widen(a) + widen(b))
auto operator==(const Other &a, const GeneratorParam< T > &b) -> decltype(a==(T) b)
Equality comparison between GeneratorParam<T> and any type that supports operator== with T.
Definition: Generator.h:1130
Expr fast_cos(const Expr &x)
Expr random_uint(Expr seed=Expr())
Return a random variable representing a uniformly distributed unsigned 32-bit integer.
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr fract(const Expr &x)
Return the fractional part of a floating-point expression.
Expr halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b)) / 2)
auto operator<(const Other &a, const GeneratorParam< T > &b) -> decltype(a<(T) b)
Less than comparison between GeneratorParam<T> and any type that supports operator< with T.
Definition: Generator.h:1091
Expr widening_shift_right(Expr a, Expr b)
Compute widen(a) >> b.
auto operator*(const Other &a, const GeneratorParam< T > &b) -> decltype(a *(T) b)
Multiplication between GeneratorParam<T> and any type that supports operator* with T.
Definition: Generator.h:1039
Expr trunc(Expr x)
Return the integer part of a floating-point expression.
Expr halving_sub(Expr a, Expr b)
Compute narrow((widen(a) - widen(b)) / 2)
auto operator||(const Other &a, const GeneratorParam< T > &b) -> decltype(a||(T) b)
Logical or between between GeneratorParam<T> and any type that supports operator|| with T.
Definition: Generator.h:1173
Expr acosh(Expr x)
Return the hyperbolic arccosine of a floating-point expression.
Expr fast_inverse(Expr x)
Fast approximate inverse for Float(32).
Expr target_arch_is(Target::Arch arch)
Return a boolean Expr for the corresponding field of the Target being used during lowering; they can ...
Expr asin(Expr x)
Return the arcsine of a floating-point expression.
Expr rounding_shift_left(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
Expr & operator/=(Expr &a, Expr b)
Modify the first expression to be the ratio of two expressions, without changing its type.
auto operator-(const Other &a, const GeneratorParam< T > &b) -> decltype(a -(T) b)
Subtraction between GeneratorParam<T> and any type that supports operator- with T.
Definition: Generator.h:1026
Expr clamp(Expr a, const Expr &min_val, const Expr &max_val)
Clamps an expression to lie within the given bounds.
Expr hypot(const Expr &x, const Expr &y)
Return the square root of the sum of the squares of two floating-point expressions.
Expr popcount(Expr x)
Count the number of set bits in an expression.
Expr target_natural_vector_size(Type t)
Return the natural vector width for the given Type for the Target being used during lowering; this ca...
Expr saturating_sub(Expr a, Expr b)
Compute saturating_narrow(widen(a) - widen(b))
Expr gather(const std::vector< Expr > &args)
Expr print_when(Expr condition, const std::vector< Expr > &values)
Create an Expr that prints whenever it is evaluated, provided that the condition is true.
Expr widening_shift_left(Expr a, Expr b)
Compute widen(a) << b.
Expr pow(Expr x, Expr y)
Return one floating point expression raised to the power of another.
Expr operator&(Expr x, Expr y)
Return the bitwise and of two expressions (which need not have the same type).
auto operator!(const GeneratorParam< T > &a) -> decltype(!(T) a)
Not operator for GeneratorParam.
Definition: Generator.h:1245
Expr lerp(Expr zero_val, Expr one_val, Expr weight)
Linear interpolate between the two values according to a weight.
Expr atan2(Expr y, Expr x)
Return the angle of a floating-point gradient.
Expr random_float(Expr seed=Expr())
Return a random variable representing a uniformly distributed float in the half-open interval [0....
Expr sin(Expr x)
Return the sine of a floating-point expression.
Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max)
Create an Expr that that promises another Expr is clamped but do not generate code to check the asser...
Expr rounding_halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b) + 1) / 2)
Expr extract_bits(Type t, const Expr &e, const Expr &lsb)
Extract a contiguous subsequence of the bits of 'e', starting at the bit index given by 'lsb',...
Expr concat_bits(const std::vector< Expr > &e)
Given a number of Exprs of the same type, concatenate their bits producing a single Expr of the same ...
Expr mux(const Expr &id, const std::initializer_list< Expr > &values)
Oftentimes we want to pack a list of expressions with the same type into a channel dimension,...
Expr cosh(Expr x)
Return the hyperbolic cosine of a floating-point expression.
std::ostream & operator<<(std::ostream &stream, const Expr &)
Emit an expression on an output stream (such as std::cout) in human-readable form.
Type Int(int bits, int lanes=1)
Constructing a signed integer type.
Definition: Type.h:541
Expr acos(Expr x)
Return the arccosine of a floating-point expression.
Expr fast_exp(const Expr &x)
Fast approximate cleanly vectorizable exp for Float(32).
Expr widening_add(Expr a, Expr b)
Compute widen(a) + widen(b).
Expr target_os_is(Target::OS os)
Expr cos(Expr x)
Return the cosine of a floating-point expression.
auto operator+(const Other &a, const GeneratorParam< T > &b) -> decltype(a+(T) b)
Addition between GeneratorParam<T> and any type that supports operator+ with T.
Definition: Generator.h:1013
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:597
Expr exp(Expr x)
Return the exponential of a floating-point expression.
Expr widen_right_mul(Expr a, Expr b)
Compute a * widen(b).
Expr absd(Expr a, Expr b)
Return the absolute difference between two values.
auto operator&&(const Other &a, const GeneratorParam< T > &b) -> decltype(a &&(T) b)
Logical and between between GeneratorParam<T> and any type that supports operator&& with T.
Definition: Generator.h:1156
Expr fast_sin(const Expr &x)
Fast vectorizable approximation to some trigonometric functions for Float(32).
Expr undef(Type t)
Return an undef value of the given type.
Expr & operator-=(Expr &a, Expr b)
Modify the first expression to be the difference of two expressions, without changing its type.
Expr fast_pow(Expr x, Expr y)
Fast approximate cleanly vectorizable pow for Float(32).
auto operator%(const Other &a, const GeneratorParam< T > &b) -> decltype(a %(T) b)
Modulo between GeneratorParam<T> and any type that supports operator% with T.
Definition: Generator.h:1065
Expr round(Expr x)
Return the whole number closest to a floating-point expression.
Expr select(Expr condition, Expr true_value, Expr false_value)
Returns an expression similar to the ternary operator in C, except that it always evaluates all argum...
Expr count_trailing_zeros(Expr x)
Count the number of trailing zero bits in an expression.
Expr scatter(const std::vector< Expr > &args)
Scatter and gather are used for update definition which must store multiple values to distinct locati...
auto operator<=(const Other &a, const GeneratorParam< T > &b) -> decltype(a<=(T) b)
Less than or equal comparison between GeneratorParam<T> and any type that supports operator<= with T.
Definition: Generator.h:1117
Expr rounding_mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q))
Expr random_int(Expr seed=Expr())
Return a random variable representing a uniformly distributed 32-bit integer.
Expr mod_round_to_zero(Expr x, Expr y)
Compute the remainder of dividing two integers, when division is rounding toward zero.
Expr strict_float(Expr e)
Makes a best effort attempt to preserve IEEE floating-point semantics in evaluating an expression.
Expr widening_mul(Expr a, Expr b)
Compute widen(a) * widen(b).
auto operator>(const Other &a, const GeneratorParam< T > &b) -> decltype(a >(T) b)
Greater than comparison between GeneratorParam<T> and any type that supports operator> with T.
Definition: Generator.h:1078
Expr is_nan(Expr x)
Returns true if the argument is a Not a Number (NaN).
Expr asinh(Expr x)
Return the hyperbolic arcsinhe of a floating-point expression.
Expr sqrt(Expr x)
Return the square root of a floating-point expression.
Expr sinh(Expr x)
Return the hyperbolic sine of a floating-point expression.
Expr atan(Expr x)
Return the arctangent of a floating-point expression.
Expr operator|(Expr x, Expr y)
Return the bitwise or of two expressions (which need not have the same type).
auto operator!=(const Other &a, const GeneratorParam< T > &b) -> decltype(a !=(T) b)
Inequality comparison between between GeneratorParam<T> and any type that supports operator!...
Definition: Generator.h:1143
Expr target_bits()
Return the bit width of the Target used during lowering; this can be useful in writing library code w...
Expr & operator*=(Expr &a, Expr b)
Modify the first expression to be the product of two expressions, without changing its type.
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a)
Cast operators for ConstantIntervals.
Expr require(Expr condition, const std::vector< Expr > &values)
Create an Expr that that guarantees a precondition.
Expr is_inf(Expr x)
Returns true if the argument is Inf or -Inf.
Expr is_finite(Expr x)
Returns true if the argument is a finite value (ie, neither NaN nor Inf).
Expr tanh(Expr x)
Return the hyperbolic tangent of a floating-point expression.
Expr likely_if_innermost(Expr e)
Equivalent to likely, but only triggers a loop partitioning if found in an innermost loop.
Expr & operator+=(Expr &a, Expr b)
Modify the first expression to be the sum of two expressions, without changing its type.
Expr atanh(Expr x)
Return the hyperbolic arctangent of a floating-point expression.
Expr tan(Expr x)
Return the tangent of a floating-point expression.
Internal::ConstantInterval saturating_cast(Type t, const Internal::ConstantInterval &a)
Expr fast_inverse_sqrt(Expr x)
Fast approximate inverse square root for Float(32).
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
Expr mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(shift_right(widening_mul(a, b), q))
auto operator/(const Other &a, const GeneratorParam< T > &b) -> decltype(a/(T) b)
Division between GeneratorParam<T> and any type that supports operator/ with T.
Definition: Generator.h:1052
Expr abs(Expr a)
Returns the absolute value of a signed integer or floating-point expression.
Expr widen_right_sub(Expr a, Expr b)
Compute a - widen(b).
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:600
Expr floor(Expr x)
Return the greatest whole number less than or equal to a floating-point expression.
Expr div_round_to_zero(Expr x, Expr y)
Divide two integers, rounding towards zero.
Expr widening_sub(Expr a, Expr b)
Compute widen(a) - widen(b).
Expr likely(Expr e)
Expressions tagged with this intrinsic are considered to be part of the steady state of some loop wit...
Expr operator~(Expr x)
Return the bitwise not of an expression.
Expr erf(const Expr &x)
Evaluate the error function erf.
Expr target_has_feature(Target::Feature feat)
Expr operator^(Expr x, Expr y)
Return the bitwise xor of two expressions (which need not have the same type).
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
signed __INT32_TYPE__ int32_t
unsigned __INT8_TYPE__ uint8_t
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
signed __INT16_TYPE__ int16_t
signed __INT8_TYPE__ int8_t
A fragment of Halide syntax.
Definition: Expr.h:258
A builder to help create Exprs representing halide_buffer_t structs (e.g.
Definition: IROperator.h:223
std::vector< Expr > strides
Definition: IROperator.h:228
std::vector< Expr > extents
Definition: IROperator.h:228
std::vector< Expr > mins
Definition: IROperator.h:228
A reference-counted handle to a statement node.
Definition: Expr.h:427
static constexpr bool value
Definition: IROperator.h:340
Feature
Optional features a target can have.
Definition: Target.h:83
Arch
The architecture used by the target.
Definition: Target.h:39
OS
The operating system used by the target.
Definition: Target.h:23
Types in the halide type system.
Definition: Type.h:283
HALIDE_ALWAYS_INLINE bool is_int() const
Is this type a signed integer type?
Definition: Type.h:435
HALIDE_ALWAYS_INLINE bool is_float() const
Is this type a floating point type (float or double).
Definition: Type.h:423
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition: Float16.h:17