Yet Another eXchange Tool  0.9.0
xt_idxstripes.c
Go to the documentation of this file.
1 
12 /*
13  * Keywords:
14  * Maintainer: Jörg Behrens <behrens@dkrz.de>
15  * Moritz Hanke <hanke@dkrz.de>
16  * Thomas Jahns <jahns@dkrz.de>
17  * URL: https://doc.redmine.dkrz.de/yaxt/html/
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are
21  * met:
22  *
23  * Redistributions of source code must retain the above copyright notice,
24  * this list of conditions and the following disclaimer.
25  *
26  * Redistributions in binary form must reproduce the above copyright
27  * notice, this list of conditions and the following disclaimer in the
28  * documentation and/or other materials provided with the distribution.
29  *
30  * Neither the name of the DKRZ GmbH nor the names of its contributors
31  * may be used to endorse or promote products derived from this software
32  * without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  */
46 #ifdef HAVE_CONFIG_H
47 #include <config.h>
48 #endif
49 
50 #include <assert.h>
51 #include <limits.h>
52 #include <stdbool.h>
53 #include <stdlib.h>
54 #include <stdio.h>
55 #include <string.h>
56 
57 #include "xt/xt_core.h"
58 #include "xt_arithmetic_util.h"
59 #include "xt_arithmetic_long.h"
60 #include "xt/xt_idxlist.h"
61 #include "xt_idxlist_internal.h"
62 #include "xt/xt_idxempty.h"
63 #include "xt/xt_idxvec.h"
64 #include "xt/xt_idxstripes.h"
65 #include "xt_idxstripes_internal.h"
66 #include "xt_stripe_util.h"
67 #include "xt/xt_mpi.h"
68 #include "xt/xt_sort.h"
69 #include "xt_idxlist_unpack.h"
70 #include "xt_cover.h"
71 #include "core/core.h"
72 #include "core/ppm_xfuncs.h"
73 #include "ensure_array_size.h"
74 #include "instr.h"
75 
76 #define MIN(a,b) (((a)<(b))?(a):(b))
77 #define MAX(a,b) (((a)>(b))?(a):(b))
78 
79 static void
81 
82 static size_t
84 
85 static void
86 idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size,
87  int *position, MPI_Comm comm);
88 
89 static Xt_idxlist
91 
92 static void
93 idxstripes_get_indices(Xt_idxlist idxlist, Xt_int *indices);
94 
95 static const Xt_int *
97 
98 static void
99 idxstripes_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe ** stripes,
100  int * num_stripes);
101 
102 static int
103 idxstripes_get_index_at_position(Xt_idxlist idxlist, int position,
104  Xt_int * index);
105 
106 static int
107 idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions,
108  int num, Xt_int *index,
109  Xt_int undef_idx);
110 static int
112  int num_stripes,
113  const struct Xt_stripe *stripes,
114  int *num_ext,
115  struct Xt_pos_ext **pos_ext,
116  int single_match_only);
117 
118 static int
120  int * position);
121 
122 static int
124  int * position, int offset);
125 
126 static Xt_int
128 
129 static Xt_int
131 
132 static const struct xt_idxlist_vtable idxstripes_vtable = {
134  .get_pack_size = idxstripes_get_pack_size,
135  .pack = idxstripes_pack,
136  .copy = idxstripes_copy,
137  .get_indices = idxstripes_get_indices,
138  .get_indices_const = idxstripes_get_indices_const,
139  .get_index_stripes = idxstripes_get_index_stripes,
140  .get_index_at_position = idxstripes_get_index_at_position,
141  .get_indices_at_positions = idxstripes_get_indices_at_positions,
142  .get_position_of_index = idxstripes_get_position_of_index,
143  .get_positions_of_indices = NULL,
144  .get_pos_exts_of_index_stripes = idxstripes_get_pos_exts_of_index_stripes,
145  .get_position_of_index_off = idxstripes_get_position_of_index_off,
146  .get_positions_of_indices_off = NULL,
147  .get_min_index = idxstripes_get_min_index,
148  .get_max_index = idxstripes_get_max_index,
149  .get_bounding_box = NULL,
150  .idxlist_pack_code = STRIPES,
151 };
152 
153 static MPI_Datatype stripe_dt;
154 
155 void
157 {
158  struct Xt_stripe stripe;
159 
160  MPI_Aint base_address, start_address, nstrides_address, stride_address;
161 
162  MPI_Get_address(&stripe, &base_address);
163  MPI_Get_address(&stripe.start, &start_address);
164  MPI_Get_address(&stripe.stride, &stride_address);
165  MPI_Get_address(&stripe.nstrides, &nstrides_address);
166 
167  enum { num_stripe_dt_elems = 3 };
168  int block_lengths[num_stripe_dt_elems] = {1,1,1};
169  MPI_Aint displacements[num_stripe_dt_elems]
170  = {start_address - base_address,
171  stride_address - base_address,
172  nstrides_address - base_address };
173  MPI_Datatype types[num_stripe_dt_elems] = { Xt_int_dt, Xt_int_dt, MPI_INT },
174  stripe_dt_unaligned;
175 
176  xt_mpi_call(MPI_Type_create_struct(num_stripe_dt_elems,
177  block_lengths, displacements, types,
178  &stripe_dt_unaligned), Xt_default_comm);
179  xt_mpi_call(MPI_Type_create_resized(stripe_dt_unaligned, 0,
180  (MPI_Aint)sizeof(stripe),
181  &stripe_dt), Xt_default_comm);
182  xt_mpi_call(MPI_Type_free(&stripe_dt_unaligned), Xt_default_comm);
183  xt_mpi_call(MPI_Type_commit(&stripe_dt), Xt_default_comm);
184 }
185 
186 void
188 {
189  xt_mpi_call(MPI_Type_free(&stripe_dt), Xt_default_comm);
190 }
191 
193 
195  struct Xt_stripe_minmax range; /* minimal and maximal position of stripe */
196  int position; /* position of stripe this range
197  * corresponds to (permutation
198  * obtained from sorting) */
199  int inv_position; /* position in stripes_sort when
200  * indexed with unsorted index */
201 };
202 
203 enum {
206 
209 };
210 
212 
213  struct Xt_idxlist_ parent;
214 
218  int flags;
219 
221  struct Xt_stripes_sort stripes_sort[];
222 };
223 
224 static int compare_xtstripes(const void * a_, const void * b_)
225 {
226  const struct Xt_stripes_sort *restrict a = a_, *restrict b = b_;
227  return ((a->range.min > b->range.min) -
228  (a->range.min < b->range.min));
229 }
230 
231 static Xt_idxlist
233  const char *caller)
234 {
235  const struct Xt_stripe *restrict stripes = idxstripes->stripes;
236  struct Xt_stripes_sort *restrict stripes_sort = idxstripes->stripes_sort;
237  Xt_int min, max;
238  {
239  struct Xt_stripe_minmax stripe_range = xt_stripe2minmax(stripes[0]);
240  stripes_sort[0].range = stripe_range;
241  stripes_sort[0].position = 0;
242  min = stripe_range.min;
243  max = stripe_range.max;
244  }
245  size_t num_stripes = (size_t)idxstripes->num_stripes;
246  long long num_indices = (long long)stripes[0].nstrides;
247  int sign_err = stripes[0].nstrides;
248  int have_zero_stride = stripes[0].stride == 0;
249  for (size_t i = 1; i < num_stripes; ++i) {
250  struct Xt_stripe_minmax stripe_range = xt_stripe2minmax(stripes[i]);
251  stripes_sort[i].range = stripe_range;
252  stripes_sort[i].position = (int)i;
253  min = MIN(stripe_range.min, min);
254  max = MAX(stripe_range.max, max);
255  num_indices += (long long)stripes[i].nstrides;
256  sign_err |= stripes[i].nstrides;
257  have_zero_stride |= stripes[i].stride == 0;
258  }
259  /* test sign bit */
260  if (sign_err < 0) {
261  static const char template[] = "ERROR: %s called with invalid stripes";
262  size_t buf_size = sizeof (template) - 2 + strlen(caller);
263  char *msg = xmalloc(buf_size);
264  snprintf(msg, buf_size, template, caller);
265  die(msg);
266  }
267  if (num_indices > 0) {
268  assert(num_indices <= INT_MAX);
269  qsort(stripes_sort, num_stripes, sizeof (*stripes_sort), compare_xtstripes);
270  stripes_sort[stripes_sort[0].position].inv_position = 0;
271  int stripes_do_overlap = 0;
272  for (size_t i = 1; i < num_stripes; ++i) {
273  stripes_do_overlap
274  |= stripes_sort[i - 1].range.max >= stripes_sort[i].range.min;
275  stripes_sort[stripes_sort[i].position].inv_position = (int)i;
276  }
277 
278  idxstripes->flags = stripes_do_overlap << stripes_do_overlap_bit
279  | have_zero_stride << stripes_some_have_zero_stride_bit;
280  idxstripes->min_index = min;
281  idxstripes->max_index = max;
282  idxstripes->index_array_cache = NULL;
283  Xt_idxlist_init(&idxstripes->parent, &idxstripes_vtable, (int)num_indices);
284  return (Xt_idxlist)idxstripes;
285  } else {
286  free(idxstripes);
287  return xt_idxempty_new();
288  }
289 }
290 
291 
293 xt_idxstripes_new(struct Xt_stripe const * stripes, int num_stripes) {
294  INSTR_DEF(instr,"xt_idxstripes_new")
295  INSTR_START(instr);
296  // ensure that yaxt is initialized
297  assert(xt_initialized());
298 
299  Xt_idxlist result;
300 
301  if (num_stripes > 0) {
302  size_t header_size = ((sizeof (struct Xt_idxstripes_)
303  + (sizeof (struct Xt_stripes_sort)
304  * (size_t)num_stripes)
305  + sizeof (struct Xt_stripe) - 1)
306  / sizeof (struct Xt_stripe))
307  * sizeof (struct Xt_stripe),
308  body_size = sizeof (struct Xt_stripe) * (size_t)num_stripes;
309  Xt_idxstripes idxstripes = xmalloc(header_size + body_size);
310  {
311  struct Xt_stripe *stripes_assign
312  = (struct Xt_stripe *)(void *)((unsigned char *)idxstripes
313  + header_size);
314  idxstripes->stripes = stripes_assign;
315  idxstripes->num_stripes
316  = (int)xt_stripes_merge_copy((size_t)num_stripes,
317  stripes_assign,
318  stripes, false);
319  }
320  result = idxstripes_aggregate(idxstripes, __func__);
321  } else
322  result = xt_idxempty_new();
323  INSTR_STOP(instr);
324  return result;
325 }
326 
329  // ensure that yaxt is initialized
330  assert(xt_initialized());
331  int num_stripes;
332  struct Xt_stripe *stripes;
333  xt_idxlist_get_index_stripes(idxlist_src, &stripes, &num_stripes);
334  Xt_idxlist result;
335  if (num_stripes > 0) {
336  /* make room for header and ... */
337  size_t header_size = ((sizeof (struct Xt_idxstripes_)
338  + (sizeof (struct Xt_stripes_sort)
339  * (size_t)num_stripes)
340  + sizeof (struct Xt_stripe) - 1)
341  / sizeof (struct Xt_stripe))
342  * sizeof (struct Xt_stripe),
343  body_size = sizeof (struct Xt_stripe) * (size_t)num_stripes;
344  Xt_idxstripes idxstripes = xrealloc(stripes, header_size + body_size);
345  struct Xt_stripe *stripes_moved
346  = (struct Xt_stripe *)(void *)((unsigned char *)idxstripes + header_size);
347  /* ... move stripes to their final position */
348  memmove(stripes_moved, idxstripes, sizeof (*stripes) * (size_t)num_stripes);
349  idxstripes->stripes = stripes_moved;
350  idxstripes->num_stripes = num_stripes;
351  result = idxstripes_aggregate(idxstripes, __func__);
352  } else
353  result = xt_idxempty_new();
354  return result;
355 }
356 
357 
358 
360 xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
361 {
362  Xt_idxlist result;
363 
364  if (num_stripes > 0) {
365  size_t header_size = ((sizeof (struct Xt_idxstripes_)
366  + ((size_t)num_stripes
367  * sizeof (struct Xt_stripes_sort))
368  + sizeof (struct Xt_stripe) - 1)
369  / sizeof (struct Xt_stripe))
370  * sizeof (struct Xt_stripe);
371  Xt_idxstripes idxstripes = xmalloc(header_size);
372  idxstripes->num_stripes = num_stripes;
373  /* this assignment is safe because no methods modify the pointee */
374  idxstripes->stripes = (struct Xt_stripe *)stripes;
375  result = idxstripes_aggregate(idxstripes, __func__);
376  } else
377  result = xt_idxempty_new();
378  return result;
379 }
380 
381 
382 static void
384 
385  if (data == NULL) return;
386 
387  Xt_idxstripes stripes = (Xt_idxstripes)data;
388 
389  free(stripes->index_array_cache);
390  free(stripes);
391 }
392 
393 static size_t
395 
396  Xt_idxstripes stripes = (Xt_idxstripes)data;
397 
398  int size_header, size_stripes = 0;
399 
400  xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
401  if (stripes->num_stripes)
402  xt_mpi_call(MPI_Pack_size(stripes->num_stripes, stripe_dt, comm,
403  &size_stripes), comm);
404 
405  return (size_t)size_header + (size_t)size_stripes;
406 }
407 
408 static void
409 idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size,
410  int *position, MPI_Comm comm) {
411  INSTR_DEF(instr,"idxstripes_pack")
412  INSTR_START(instr);
413 
414  assert(data);
415  Xt_idxstripes stripes = (Xt_idxstripes)data;
416  int header[2] = { STRIPES, stripes->num_stripes };
417 
418  xt_mpi_call(MPI_Pack(header, 2, MPI_INT, buffer,
419  buffer_size, position, comm), comm);
420  int num_stripes = stripes->num_stripes;
421  if (num_stripes)
422  xt_mpi_call(MPI_Pack(stripes->stripes, num_stripes, stripe_dt,
423  buffer, buffer_size, position, comm), comm);
424  INSTR_STOP(instr);
425 }
426 
427 Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position,
428  MPI_Comm comm) {
429 
430  INSTR_DEF(instr,"xt_idxstripes_unpack")
431  INSTR_START(instr);
432 
433  int num_stripes;
434  xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
435  &num_stripes, 1, MPI_INT, comm), comm);
436 
437  Xt_idxlist result;
438  if (num_stripes) {
439  size_t header_size = ((sizeof (struct Xt_idxstripes_)
440  + ((size_t)num_stripes
441  * sizeof (struct Xt_stripes_sort))
442  + sizeof (struct Xt_stripe) - 1)
443  / sizeof (struct Xt_stripe))
444  * sizeof (struct Xt_stripe),
445  body_size = sizeof (struct Xt_stripe) * (size_t)num_stripes;
446  Xt_idxstripes idxstripes = xmalloc(header_size + body_size);
447  idxstripes->num_stripes = num_stripes;
448  {
449  struct Xt_stripe *stripes_assign
450  = (struct Xt_stripe *)(void *)((unsigned char *)idxstripes
451  + header_size);
452  idxstripes->stripes = stripes_assign;
453  xt_mpi_call(MPI_Unpack(buffer, buffer_size, position, stripes_assign,
454  num_stripes, stripe_dt, comm),comm);
455  }
456  result = idxstripes_aggregate(idxstripes, __func__);
457  } else
458  result = xt_idxempty_new();
459 
460  INSTR_STOP(instr);
461  return result;
462 }
463 
464 static Xt_idxlist
466  Xt_idxstripes idxstripes_dst) {
467  INSTR_DEF(instr,"compute_intersection_fallback")
468  INSTR_START(instr);
469 
470  Xt_idxlist idxvec_from_stripes_src
471  = xt_idxvec_from_stripes_new(idxstripes_src->stripes,
472  idxstripes_src->num_stripes);
473  Xt_idxlist idxvec_from_stripes_dst
474  = xt_idxvec_from_stripes_new(idxstripes_dst->stripes,
475  idxstripes_dst->num_stripes);
476 
477  Xt_idxlist intersection
478  = xt_idxlist_get_intersection(idxvec_from_stripes_src,
479  idxvec_from_stripes_dst);
480 
481  xt_idxlist_delete(idxvec_from_stripes_src);
482  xt_idxlist_delete(idxvec_from_stripes_dst);
483  INSTR_STOP(instr);
484  return intersection;
485 }
486 
487 
488 struct extended_gcd {
490 };
491 
492 /* computes egcd of two positive integers a and b such that
493  egcd.gcd == egcd.u * a + egcd.v * b */
494 static inline struct extended_gcd extended_gcd(Xt_int a, Xt_int b) {
495  Xt_int t = 1, u = 1, v = 0, s = 0;
496  while (b>0)
497  {
498  Xt_int q = (Xt_int)(a / b);
499  Xt_int prev_a = a;
500  a = b;
501  b = (Xt_int)(prev_a - q * b);
502  Xt_int prev_u = u;
503  u = s;
504  s = (Xt_int)(prev_u - q * s);
505  Xt_int prev_v = v;
506  v = t;
507  t = (Xt_int)(prev_v - q * t);
508  }
509  return (struct extended_gcd){ .gcd = a, .u = u, .v = v };
510 }
511 
512 /* This implementation uses the method outlined in
513  * James M. Stichnoth,
514  * Efficient Compilation of Array Statements for Private Memory Multicomputers
515  * February, 1993 CMU-CS-93-109
516  */
517 static struct Xt_stripe
518 get_stripe_intersection(struct Xt_stripe stripe_a,
519  struct Xt_stripe stripe_b) {
520 
521  INSTR_DEF(instr,"get_stripe_intersection")
522  INSTR_START(instr);
523 
524  struct Xt_bounded_stripe {
525  Xt_int min, max, stride, representative;
526  };
527 
528  struct Xt_bounded_stripe bsa, bsb, bsi;
529 
530  Xt_int stride_zero_mask_a = (stripe_a.stride != 0) - 1,
531  stride_zero_mask_b = (stripe_b.stride != 0) - 1;
532  {
533  Xt_int mask = Xt_isign_mask(stripe_a.stride);
534  bsa.min = (Xt_int)(stripe_a.start
535  + (mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
536  bsa.max = (Xt_int)(stripe_a.start
537  + (~mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
538  }
539  bsa.representative = stripe_a.start;
540  {
541  Xt_int mask = Xt_isign_mask(stripe_b.stride);
542  bsb.min = (Xt_int)(stripe_b.start
543  + (mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
544  bsb.max = (Xt_int)(stripe_b.start
545  + (~mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
546  }
547  bsb.representative = stripe_b.start;
548 
549  bsa.stride = (Xt_int)((stripe_a.stride & ~stride_zero_mask_a)
550  | (stride_zero_mask_a & 1));
551  bsb.stride = (Xt_int)((stripe_b.stride & ~stride_zero_mask_b)
552  | (stride_zero_mask_b & 1));
553 
554  /* FIXME: adjust the one further away from 0 */
555  /* adjust second representative to minimize difference to first representative */
556  Xt_int abs_stride_b = XT_INT_ABS(bsb.stride);
557 #ifdef XT_LONG
558  Xt_long start_diff = (Xt_long)stripe_a.start - stripe_b.start;
559  bsb.representative
560  = (Xt_int)(bsb.representative
561  + (start_diff/abs_stride_b
562  + (start_diff%abs_stride_b > abs_stride_b/2))
563  * abs_stride_b);
564  start_diff = (Xt_long)bsb.representative - bsa.representative;
565 #else
566  Xt_long start_diff = xiisub(stripe_a.start, stripe_b.start);
567  Xt_idiv start_diff_abs_stride_b_div
568  = xlidivu(xlabs(start_diff), (Xt_uint)abs_stride_b);
569  bsb.representative
570  = (Xt_int)(bsb.representative
571  + (start_diff_abs_stride_b_div.quot
572  + start_diff_abs_stride_b_div.rem > abs_stride_b/2)
573  * abs_stride_b);
574  start_diff = xiisub(bsb.representative, bsa.representative);
575 #endif
576  Xt_int abs_stride_a = XT_INT_ABS(bsa.stride);
577  struct extended_gcd eg = extended_gcd(abs_stride_a, abs_stride_b);
578  bsi.min = MAX(bsa.min, bsb.min);
579  bsi.max = MIN(bsa.max, bsb.max);
580  /* we need to temporarily represent the product
581  * (bsb.representative - bsa.representative) * eg.u * abs_stride_a
582  * so we figure out first, if that product would overflow Xt_int
583  */
584  int stride_a_bits = xt_int_bits - xinlz((Xt_uint)abs_stride_a),
585  stride_bits = xt_int_bits - xinlz((Xt_uint)abs_stride_b) + stride_a_bits,
586  product_bits;
587  if (bsb.representative != bsa.representative)
588  product_bits =
589  xt_int_bits - xinlz((Xt_uint)XT_INT_ABS(eg.u))
590 #ifdef XT_LONG
591  + xt_int_bits - xinlz((Xt_uint)xlabs(start_diff))
592 #else
593  + xt_int_bits - xinlz(xlabs(start_diff).lo)
594 #endif
595  + stride_a_bits;
596  else
597  product_bits = 0;
598  int strides_mask, nstrides;
599  struct Xt_stripe intersection;
600  if (product_bits < xt_int_bits && stride_bits < xt_int_bits) {
601  Xt_int temp_stride = (Xt_int)((abs_stride_a * bsb.stride)/eg.gcd);
602  bsi.stride = (Xt_int)temp_stride;
603  Xt_int min_rep;
604  {
605  Xt_int some_rep
606  = (Xt_int)(bsa.representative
607  + ((bsb.representative - bsa.representative) * eg.u
608  * abs_stride_a / eg.gcd));
609  /* compute minimal bsi representative >= bsi.min */
610  Xt_int abs_bsi_stride = XT_INT_ABS(temp_stride),
611  r_diff = (Xt_int)(bsi.min - some_rep),
612  steps = (Xt_int)(r_diff / abs_bsi_stride);
613  steps = (Xt_int)(steps + (steps * abs_bsi_stride < r_diff));
614  min_rep = (Xt_int)(some_rep + steps * abs_bsi_stride);
615  bsi.representative = (Xt_int)min_rep;
616  }
617  nstrides = (int)((bsi.max - min_rep)/temp_stride + llsign(temp_stride));
618  int even_divide
619  = ((((bsb.representative - bsa.representative) % eg.gcd) == 0)
620  & (bsi.stride == temp_stride || abs(nstrides) == 1));
621  /* requires two's complement integers */
622  strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
623  & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
624  Xt_int max_rep
625  = (Xt_int)(min_rep + (nstrides - llsign(temp_stride)) * bsi.stride);
626  intersection.start = (Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
627  } else {
628 #ifdef XT_LONG
629 #define ABS(x) (((x) < 0) ? -(x) : (x))
630  Xt_long temp_stride = xiimul(abs_stride_a, bsb.stride)/eg.gcd;
631  bsi.stride = (Xt_int)temp_stride;
632  Xt_long min_rep;
633  /* did the computation of bsi.stride remain in Xt_int range? */
634  bool bsi_stride_in_range = xl_is_in_xt_int_range(temp_stride);
635  /* did the computation of min_rep remain in Xt_int range? */
636  bool min_rep_in_range;
637  {
638  Xt_long some_rep;
639  if (bsb.representative != bsa.representative) {
640  /* computation might generate huge intermediary values, needing 3
641  * times as many bits as Xt_int has, so we use long multiplication */
642  Xt_long temp_1 = start_diff * eg.u;
643  Xt_tword temp_2 = xlimulu(temp_1, (Xt_uint)abs_stride_a);
644  /* some_rep will be representable as Xt_long
645  * if dividing by eg.gcd results in a value in the range of Xt_long...*/
646  min_rep_in_range
647  /* ... that means either all bits in temp_2.hi are
648  * identical to the high bit of temp_2.midlo ... */
649  = (temp_2.hi
650  == (Xt_uint)(Xt_isign_mask((Xt_int)(temp_2.midlo >> xt_int_bits))))
651  /* or the aggregate value of temp_2 divided by
652  * XT_LONG_MAX+1 is less than eg.gcd, instead of actually
653  * dividing, we can simply shift the uppermost bit of
654  * temp_2.midlo into temp_2.hi left-shifted by one */
655  | (XT_INT_ABS((Xt_int)(temp_2.hi << 1) | ((Xt_long)temp_2.midlo < 0)) < eg.gcd);
656  /* compute any representative */
657  some_rep
658  = (Xt_long)bsa.representative + ((Xt_long)temp_2.midlo / eg.gcd);
659  } else {
660  min_rep_in_range = true;
661  some_rep = bsa.representative;
662  }
663  /* compute minimal bsi representative >= bsi.min */
664  Xt_long abs_bsi_stride = ABS(temp_stride),
665  r_diff = bsi.min - some_rep,
666  steps = r_diff / abs_bsi_stride;
667  bool steps_in_range = xl_is_in_xt_int_range(steps);
668  steps = steps + (steps * abs_bsi_stride < r_diff);
669  min_rep = some_rep;
670  if (steps_in_range)
671  min_rep += steps * abs_bsi_stride;
672  min_rep_in_range &= xl_is_in_xt_int_range(min_rep);
673  bsi.representative = (Xt_int)min_rep;
674  }
675  int min_rep_mask = ~((int)min_rep_in_range - 1);
676  nstrides = (int)((((bsi.max - min_rep)/temp_stride)+xlsign(temp_stride))
677  & min_rep_mask)
678  | (~min_rep_mask & (bsb.representative == bsa.representative));
679  int even_divide
680  = ((start_diff%eg.gcd == 0) & (bsi_stride_in_range || abs(nstrides) == 1));
681  /* requires two's complement integers */
682  strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
683  & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
684  Xt_int max_rep
685  = (Xt_int)(min_rep + (nstrides - xlsign(temp_stride)) * bsi.stride);
686  intersection.start = (Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
687 #else
688  Xt_long temp_stride = xlldiv(xiimul(abs_stride_a, bsb.stride),
689  xi2l(eg.gcd)).quot;
690  bsi.stride = (Xt_int)temp_stride.lo;
691  /* minimal value that is reachable by both stripe starts plus some
692  * multiple of the respective stride */
693  Xt_long min_rep;
694  /* did the computation of bsi.stride remain in Xt_int range? */
695  bool bsi_stride_in_range = xl_is_in_xt_int_range(temp_stride);
696  /* did the computation of min_rep remain in Xt_int range? */
697  bool min_rep_in_range;
698  {
699  Xt_long some_rep;
700  if (bsb.representative != bsa.representative) {
701  /* computation might generate huge intermediary values */
702  Xt_long temp_1
703  = xiimul(bsb.representative - bsa.representative, eg.u);
704  Xt_tword temp_2 = xlimul(temp_1, abs_stride_a);
705  min_rep_in_range
706  = (temp_2.hi
707  == (Xt_uint)(Xt_isign_mask((Xt_int)(temp_2.mid))))
708  | (XT_INT_ABS((Xt_int)(temp_2.hi << 1) | ((Xt_int)temp_2.mid < 0)) < eg.gcd);
709  Xt_long t2 = (Xt_long){.hi = temp_2.mid, .lo = temp_2.lo };
710  Xt_ldiv temp_3;
711  if (eg.gcd == 1) {
712  temp_3.quot = t2;
713  temp_3.rem = (Xt_long){.hi = 0, .lo = 0 };
714  } else if (xlicmp_lt(t2, eg.gcd)) {
715  temp_3.quot = (Xt_long){ 0, 0 };
716  temp_3.rem = t2;
717  } else
718  temp_3 = xlldiv((Xt_long){.hi = temp_2.mid, .lo = temp_2.lo },
719  xi2l(eg.gcd));
720  some_rep = xliadd(temp_3.quot, bsa.representative);
721  } else {
722  min_rep_in_range = true;
723  some_rep = xi2l(bsa.representative);
724  }
725  /* compute minimal bsi representative >= bsi.min */
726  Xt_long abs_bsi_stride = xlabs(temp_stride),
727  r_diff = xilsub(bsi.min, some_rep);
728  Xt_ldiv steps = xlldiv(r_diff, abs_bsi_stride);
729  bool steps_in_range = xl_is_in_xt_int_range(steps.quot);
730  steps.quot = xlinc(steps.quot, (bool)((Xt_int)steps.rem.hi >= 0
731  && steps.rem.lo != 0));
732  if (steps_in_range)
733  min_rep = xlladd(some_rep,
734  xiimul((Xt_int)steps.quot.lo,
735  (Xt_int)abs_bsi_stride.lo));
736  else
737  min_rep = some_rep;
738  min_rep_in_range &= xl_is_in_xt_int_range(min_rep);
739  bsi.representative = (Xt_int)min_rep.lo;
740  }
741  if (min_rep_in_range)
742  nstrides = (int)xlldiv(xilsub(bsi.max, min_rep), temp_stride).quot.lo + xlsign(temp_stride);
743  else
744  nstrides = bsb.representative == bsa.representative;
745  int even_divide
746  = ((((bsb.representative - bsa.representative) % eg.gcd) == 0)
747  & (bsi_stride_in_range || abs(nstrides) == 1));
748  /* requires two's complement integers */
749  strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
750  & xlicmp_le(min_rep, bsi.max) & xlicmp_ge(min_rep, bsi.min)) - 1);
751  Xt_int max_rep
752  = (Xt_int)(min_rep.lo
753  + (Xt_uint)((nstrides - xlsign(temp_stride)) * bsi.stride));
754  intersection.start = ((bsa.stride >= 0) ? (Xt_int)min_rep.lo : max_rep);
755 #endif
756  }
757  intersection.stride
758  = (Xt_int)(Xt_isign(bsa.stride) * XT_INT_ABS(bsi.stride));
759  intersection.nstrides
760  = (abs(nstrides) & strides_mask
761  & ~((int)stride_zero_mask_a & (int)stride_zero_mask_b))
762  | (stripe_b.nstrides & (int)stride_zero_mask_a & (int)stride_zero_mask_b);
763  INSTR_STOP(instr);
764  return intersection;
765 }
766 
767 // this routine only works for idxstripes where !stripes_do_overlap
768 static Xt_idxlist
770  Xt_idxstripes idxstripes_dst) {
771 
772  INSTR_DEF(instr,"idxstripes_compute_intersection")
773  INSTR_START(instr);
774 
775  struct Xt_stripe *restrict inter_stripes = NULL;
776  size_t num_inter_stripes = 0;
777  size_t inter_stripes_array_size = 0;
778 
779  const struct Xt_stripes_sort *restrict src_stripes_sort
780  = idxstripes_src->stripes_sort,
781  *restrict dst_stripes_sort = idxstripes_dst->stripes_sort;
782  const struct Xt_stripe *restrict stripes_src = idxstripes_src->stripes,
783  *restrict stripes_dst = idxstripes_dst->stripes;
784 
785  size_t i_src = 0, i_dst = 0;
786  size_t num_stripes_src = (size_t)idxstripes_src->num_stripes,
787  num_stripes_dst = (size_t)idxstripes_dst->num_stripes;
788 
789  while ((i_src < num_stripes_src) &
790  (i_dst < num_stripes_dst)) {
791 
792  while (i_src < num_stripes_src &&
793  src_stripes_sort[i_src].range.max
794  < dst_stripes_sort[i_dst].range.min) ++i_src;
795 
796  if ( i_src >= num_stripes_src ) break;
797 
798  while (i_dst < num_stripes_dst &&
799  dst_stripes_sort[i_dst].range.max
800  < src_stripes_sort[i_src].range.min) ++i_dst;
801 
802  if ( i_dst >= num_stripes_dst ) break;
803 
804  if ((src_stripes_sort[i_src].range.min
805  <= dst_stripes_sort[i_dst].range.max)
806  & (src_stripes_sort[i_src].range.max
807  >= dst_stripes_sort[i_dst].range.min)) {
808  ENSURE_ARRAY_SIZE(inter_stripes, inter_stripes_array_size,
809  num_inter_stripes+1);
810 
811  struct Xt_stripe intersection_stripe;
812  inter_stripes[num_inter_stripes] = intersection_stripe =
813  get_stripe_intersection(stripes_src[src_stripes_sort[i_src].position],
814  stripes_dst[dst_stripes_sort[i_dst].position]);
815  num_inter_stripes += intersection_stripe.nstrides > 0;
816  }
817 
818  if (dst_stripes_sort[i_dst].range.max
819  < src_stripes_sort[i_src].range.max)
820  i_dst++;
821  else
822  i_src++;
823  }
824 
825  if (num_inter_stripes) {
826  /* invert negative and merge consecutive stripes */
827  struct Xt_stripe prev_stripe = inter_stripes[0];
828  if (prev_stripe.stride < 0) {
829  prev_stripe.start
830  = (Xt_int)(prev_stripe.start
831  + (prev_stripe.stride * (Xt_int)(prev_stripe.nstrides - 1)));
832  prev_stripe.stride = (Xt_int)-prev_stripe.stride;
833  }
834  inter_stripes[0] = prev_stripe;
835  size_t j = 0;
836  for (size_t i = 1; i < num_inter_stripes; ++i) {
837  struct Xt_stripe stripe = inter_stripes[i];
838  if (stripe.stride < 0) {
839  stripe.start = (Xt_int)(stripe.start
840  + stripe.stride * (Xt_int)(stripe.nstrides - 1));
841  stripe.stride = (Xt_int)-stripe.stride;
842  }
843  if ((stripe.stride == prev_stripe.stride)
844  & (stripe.start
845  == (prev_stripe.start
846  + prev_stripe.stride * (Xt_int)prev_stripe.nstrides)))
847  {
848  prev_stripe.nstrides += stripe.nstrides;
849  inter_stripes[j].nstrides = prev_stripe.nstrides;
850  }
851  else
852  {
853  inter_stripes[++j] = stripe;
854  prev_stripe = stripe;
855  }
856  }
857  num_inter_stripes = j + 1;
858  }
859 
860  Xt_idxlist inter = xt_idxstripes_new(inter_stripes, (int)num_inter_stripes);
861 
862  free(inter_stripes);
863 
864  INSTR_STOP(instr);
865  return inter;
866 }
867 
870 {
871  // both lists are index stripes:
872  Xt_idxstripes idxstripes_src = (Xt_idxstripes)idxlist_src,
873  idxstripes_dst = (Xt_idxstripes)idxlist_dst;
874 
875  if ((idxstripes_src->flags | idxstripes_dst->flags)
877  return compute_intersection_fallback(idxstripes_src, idxstripes_dst);
878  } else
879  return idxstripes_compute_intersection(idxstripes_src, idxstripes_dst);
880 }
881 
882 static Xt_idxlist
884 
885  Xt_idxstripes stripes = (Xt_idxstripes)idxlist;
886 
887  return xt_idxstripes_new(stripes->stripes, stripes->num_stripes);
888 }
889 
890 static void
892  INSTR_DEF(instr,"idxstripes_get_indices")
893  INSTR_START(instr);
894 
896  Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
897  const struct Xt_stripe *restrict stripes = idxstripes->stripes;
898  size_t num_stripes = (size_t)idxstripes->num_stripes;
899  for (size_t i = 0; i < num_stripes; ++i)
900  for (Xt_int j = 0; j < stripes[i].nstrides; ++j)
901  *(indices++) = (Xt_int)(stripes[i].start + j * stripes[i].stride);
902 
903  INSTR_STOP(instr);
904 }
905 
906 static Xt_int const*
908 
909  Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
910 
911  if (idxstripes->index_array_cache) return idxstripes->index_array_cache;
912 
913  int num_indices = idxlist->num_indices;
914 
915  Xt_int *tmp_index_array
916  = xmalloc((size_t)num_indices * sizeof( *(idxstripes->index_array_cache) ) );
917 
918  idxstripes_get_indices(idxlist, tmp_index_array);
919 
920  idxstripes->index_array_cache = tmp_index_array;
921 
922  return idxstripes->index_array_cache;
923 }
924 
925 static void
927  int * num_stripes) {
928 
929  INSTR_DEF(instr,"idxstripes_get_index_stripes")
930  INSTR_START(instr);
931 
932  Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
933  const struct Xt_stripe *restrict stripes_ = idxstripes->stripes;
934 
935  size_t num_temp_stripes, num_stripes_ = (size_t)idxstripes->num_stripes;
936 
937  num_temp_stripes = num_stripes_;
938  for (size_t i = 0; i < num_stripes_; ++i)
939  if (stripes_[i].stride != 1)
940  num_temp_stripes += (size_t)stripes_[i].nstrides - 1U;
941  struct Xt_stripe *restrict temp_stripes = *stripes
942  = xrealloc(*stripes, num_temp_stripes * sizeof(*temp_stripes));
943  *num_stripes = (int)num_temp_stripes;
944  num_temp_stripes = 0;
945  for (size_t i = 0; i < num_stripes_; ++i)
946  if (stripes_[i].stride == 1)
947  temp_stripes[num_temp_stripes++] = stripes_[i];
948  else
949  for (size_t j = 0; j < (size_t)stripes_[i].nstrides; ++j) {
950  temp_stripes[num_temp_stripes].start
951  = (Xt_int)(stripes_[i].start + (Xt_int)j * stripes_[i].stride);
952  temp_stripes[num_temp_stripes].nstrides = 1;
953  temp_stripes[num_temp_stripes].stride = 1;
954  ++num_temp_stripes;
955  }
956 
957  INSTR_STOP(instr);
958 }
959 
960 static int
962  Xt_int * index) {
963 
964  INSTR_DEF(instr,"idxstripes_get_index_at_position")
965  INSTR_START(instr);
966 
967  int retval;
968 
969  if (position >= 0 && position < idxlist->num_indices) {
970  Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
971  const struct Xt_stripe *restrict stripes = idxstripes->stripes;
972  size_t num_stripes = (size_t)idxstripes->num_stripes;
973  size_t i = 0;
974  while (i < num_stripes && position >= stripes[i].nstrides) {
975  position-= stripes[i].nstrides;
976  ++i;
977  }
978  *index = (Xt_int)(stripes[i].start + position * stripes[i].stride);
979  retval = 0;
980  } else {
981  retval = 1;
982  }
983 
984  INSTR_STOP(instr);
985  return retval;
986 }
987 
988 
989 static int
990 idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions,
991  int num_pos, Xt_int *index,
992  Xt_int undef_idx) {
993 
994  INSTR_DEF(instr,"idxstripes_get_indices_at_positions")
995  INSTR_START(instr);
996 
997  Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
998  const struct Xt_stripe *stripes = idxstripes->stripes;
999 
1000  int max_pos = idxlist->num_indices;
1001  int sub_pos = 0;
1002  /* initial guess which stripe to inspect for position */
1003  int istripe = 0;
1004  /* position range corresponding to stripe indexed by istripe */
1005  int stripe_start_pos = 0;
1006  int undef_count = 0;
1007 
1008  for (int ipos = 0; ipos < num_pos; ipos++) {
1009 
1010  int seek_pos = positions[ipos];
1011 
1012  if (seek_pos >= 0 && seek_pos < max_pos) {
1013  while (seek_pos < stripe_start_pos) {
1014  istripe--;
1015 #ifndef NDEBUG
1016  if (istripe < 0)
1017  die("idxstripes_get_indices_at_positions: internal error:"
1018  " crossed 0-boundary");
1019 #endif
1020  stripe_start_pos -= (int)stripes[istripe].nstrides;
1021  }
1022 
1023  while (seek_pos > stripe_start_pos + stripes[istripe].nstrides - 1) {
1024  stripe_start_pos += (int)stripes[istripe].nstrides;
1025  istripe++;
1026 #ifndef NDEBUG
1027  if (istripe >= idxstripes->num_stripes)
1028  die("idxstripes_get_indices_at_positions: internal error:"
1029  " crossed upper boundary");
1030 #endif
1031  }
1032 
1033  sub_pos = seek_pos - stripe_start_pos;
1034  index[ipos]
1035  = (Xt_int)(stripes[istripe].start + sub_pos * stripes[istripe].stride);
1036  } else {
1037  index[ipos] = undef_idx;
1038  undef_count++;
1039  }
1040  }
1041 
1042  INSTR_STOP(instr);
1043 
1044  return undef_count;
1045 }
1046 
1047 static int
1049  int * position) {
1050 
1051  return idxstripes_get_position_of_index_off(idxlist, index, position, 0);
1052 }
1053 
1054 static int
1056  int * position, int offset) {
1057 
1058  INSTR_DEF(instr,"idxstripes_get_position_of_index_off")
1059  INSTR_START(instr);
1060 
1061  int retval = 1;
1062 
1063  Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1064  const struct Xt_stripe *restrict stripes = idxstripes->stripes;
1065  size_t num_stripes = (size_t)idxstripes->num_stripes, i = 0;
1066  Xt_int position_offset = 0;
1067 
1068  while (i < num_stripes && position_offset + stripes[i].nstrides <= offset)
1069  position_offset = (Xt_int)(position_offset + stripes[i++].nstrides);
1070 
1071  for (; i < num_stripes;
1072  position_offset = (Xt_int)(position_offset + stripes[i++].nstrides)) {
1073 
1074  if ((stripes[i].stride > 0 && index < stripes[i].start)
1075  || (stripes[i].stride < 0 && index > stripes[i].start))
1076  continue;
1077 
1078  Xt_int rel_start = (Xt_int)(index - stripes[i].start);
1079 
1080  if (rel_start % stripes[i].stride) continue;
1081 
1082  if (rel_start / stripes[i].stride >= stripes[i].nstrides)
1083  continue;
1084 
1085  *position = (int)(rel_start/stripes[i].stride + position_offset);
1086 
1087  retval = 0;
1088  goto fun_exit;
1089  }
1090 
1091  *position = -1;
1092 
1093  fun_exit: ;
1094  INSTR_STOP(instr);
1095  return retval;
1096 }
1097 
1098 static inline void
1099 append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
1100 {
1101  size_t num_pos_exts_ = result->num_pos_ext,
1102  size_pos_exts_ = result->size_pos_ext;
1103  struct Xt_pos_ext *restrict pos_exts_ = result->pos_ext;
1104  if (!xt_can_merge_pos_ext(pos_exts_[num_pos_exts_ - 1], pos_ext))
1105  {
1106  if (num_pos_exts_ + 1 == size_pos_exts_)
1107  {
1108  size_pos_exts_ += 16;
1109  /* offsetting by 1 necessary to keep the terminator in place */
1110  result->pos_ext = pos_exts_ = (struct Xt_pos_ext *)
1111  xrealloc(pos_exts_ - 1, (size_pos_exts_ + 1) * sizeof (*pos_exts_)) + 1;
1112  result->size_pos_ext = size_pos_exts_;
1113  }
1114  pos_exts_[num_pos_exts_] = pos_ext;
1115  result->num_pos_ext = num_pos_exts_ + 1;
1116  }
1117  else {
1118  /* merge new ext with previous */
1119  pos_exts_[num_pos_exts_ - 1].size
1120  = isign(pos_ext.start - pos_exts_[num_pos_exts_ - 1].start)
1121  * (abs(pos_exts_[num_pos_exts_ - 1].size) + abs(pos_ext.size));
1122  }
1123 }
1124 
1126  size_t num_stripes;
1127  const struct Xt_stripe *stripes;
1131 };
1132 
1133 static inline void
1135  Xt_idxstripes idxstripes) {
1136  const struct Xt_stripe *restrict stripes;
1137  db->stripes = stripes = idxstripes->stripes;
1138  size_t num_db_stripes = (size_t)idxstripes->num_stripes;
1139  /* using num_db_stripes + 1 ensures re-aligning is always possible */
1140  int *restrict db_stripes_nstrides_psum
1141  = xmalloc((num_db_stripes + 1)
1142  * sizeof (db->stripes_nstrides_psum[0]));
1143  db_stripes_nstrides_psum[0] = 0;
1144  for (size_t j = 0; j < num_db_stripes; ++j) {
1145  db_stripes_nstrides_psum[j + 1]
1146  = db_stripes_nstrides_psum[j] + stripes[j].nstrides;
1147  }
1148  db->stripes_sort = idxstripes->stripes_sort;
1149  db->num_stripes = num_db_stripes;
1150  db->stripes_nstrides_psum = db_stripes_nstrides_psum;
1151  db->stripes_do_overlap = idxstripes->flags & stripes_do_overlap_mask;
1152 }
1153 
1154 static inline void
1156  free(db->stripes_nstrides_psum);
1157 }
1158 
1159 struct int_vec
1160 {
1161  size_t size, num;
1162  int *vec;
1163 };
1164 
1165 static inline size_t
1167  const struct Xt_stripes_sort a[n],
1168  Xt_int min_key)
1169 {
1170  size_t left = 0, right = n - 1; /* avoid overflow in `(left + right)/2' */
1171  if ((a && n > 0)) ; else return n; /* invalid input or empty array */
1172  while (left < right)
1173  {
1174  /* invariant: a[left].range.min <= min_key <= a[right].range.min
1175  * or not in a */
1176  /*NOTE: *intentionally* truncate for odd sum */
1177  size_t m = (left + right + 1) / 2;
1178  if (a[m].range.min > min_key)
1179  right = m - 1; /* a[m].range.min <= min_key < a[right].range.min
1180  * or min_key not in a */
1181  else
1182  left = m;/* a[left].range.min <= min_key <= a[m].range.min
1183  * or min_key not in a */
1184  }
1185  /* assert(left == right) */
1186  return a[right].range.min <= min_key ? right : n;
1187 }
1188 
1189 static void
1191  const struct Xt_stripes_lookup *restrict db,
1192  struct int_vec *candidates)
1193 {
1194  struct Xt_stripe_minmax query_minmax = xt_stripe2minmax(query);
1195  size_t num_db_stripes = db->num_stripes;
1196  const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1197  size_t start_pos
1198  = bsearch_stripes_sort(num_db_stripes, db_stripes_sort, query_minmax.max);
1199  if (start_pos != num_db_stripes) {
1200  assert(db_stripes_sort[start_pos].range.min <= query_minmax.max);
1201  size_t end_pos = start_pos;
1202  while (end_pos < num_db_stripes
1203  && db_stripes_sort[end_pos].range.min <= query_minmax.max)
1204  ++end_pos;
1205  /* find all overlaps (which is more complicated if
1206  * non-overlapping isn't guaranteed) */
1207  size_t num_candidates;
1208  if (!db->stripes_do_overlap)
1209  {
1210  while (start_pos > 0
1211  && (db_stripes_sort[start_pos - 1].range.max >= query_minmax.min))
1212  --start_pos;
1213  num_candidates = end_pos - start_pos;
1214  if (candidates->size < num_candidates) {
1215  candidates->vec = xrealloc(candidates->vec,
1216  num_candidates
1217  * sizeof (candidates->vec[0]));
1218  candidates->size = num_candidates;
1219  }
1220  candidates->num = num_candidates;
1221  int *restrict candidates_vec = candidates->vec;
1222  for (size_t i = 0; i < num_candidates; ++i)
1223  candidates_vec[i] = db_stripes_sort[start_pos + i].position;
1224  }
1225  else
1226  {
1227  num_candidates = 0;
1228  size_t min_candidate = start_pos;
1229  for (size_t i = end_pos - 1; i != SIZE_MAX; --i)
1230  {
1231  size_t predicate
1232  = ((db_stripes_sort[i].range.min <= query_minmax.max)
1233  & (db_stripes_sort[i].range.max >= query_minmax.min));
1234  num_candidates += predicate;
1235  size_t predicate_mask = predicate - 1;
1236  min_candidate = (min_candidate & predicate_mask)
1237  | (i & ~predicate_mask);
1238  }
1239  if (candidates->size < num_candidates + 1)
1240  {
1241  candidates->vec = xrealloc(candidates->vec,
1242  (num_candidates + 1)
1243  * sizeof (candidates[0]));
1244  candidates->size = num_candidates + 1;
1245  }
1246  candidates->num = num_candidates;
1247  int *restrict candidates_vec = candidates->vec;
1248  size_t j = 0;
1249  for (size_t i = min_candidate; i < end_pos; ++i) {
1250  candidates_vec[j] = db_stripes_sort[i].position;
1251  j += ((size_t)(db_stripes_sort[i].range.min <= query_minmax.max)
1252  & (size_t)(db_stripes_sort[i].range.max >= query_minmax.min));
1253  }
1254  assert(j == num_candidates);
1255  }
1256  xt_sort_int(candidates->vec, num_candidates);
1257  } else
1258  candidates->num = 0;
1259 }
1260 
1261 static struct Xt_idxstripes_ *
1263  const struct Xt_stripe *restrict stripes)
1264 {
1265  size_t expansion = 0;
1266  for (size_t i = 0; i < num_stripes; ++i) {
1267  expansion += stripes[i].stride == 0 ? (size_t)(stripes[i].nstrides - 1) : 0;
1268  }
1269  struct Xt_stripe *restrict expanded_stripes
1270  = xmalloc((num_stripes + expansion) * sizeof (expanded_stripes[0]));
1271  size_t j = 0;
1272  for (size_t i = 0; i < num_stripes; ++i) {
1273  struct Xt_stripe stripe = stripes[i];
1274  if (stripe.stride == 0) {
1275  for (size_t k = 0; k < (size_t)stripe.nstrides; ++k)
1276  expanded_stripes[j + k] = (struct Xt_stripe){ .start = stripe.start,
1277  .stride = 1,
1278  .nstrides = 1 };
1279  j += (size_t)stripe.nstrides;
1280  } else {
1281  expanded_stripes[j] = stripe;
1282  ++j;
1283  }
1284  }
1285  return (struct Xt_idxstripes_ *)
1286  xt_idxstripes_prealloc_new(expanded_stripes,
1287  (int)(num_stripes + expansion));
1288 }
1289 
1290 
1291 static size_t
1293  struct Xt_stripe query,
1294  const struct Xt_stripes_lookup *restrict db,
1295  struct Xt_pos_ext_vec *restrict result,
1296  struct Xt_pos_ext_vec *restrict cover,
1297  bool single_match_only,
1298  size_t num_candidates,
1299  int *restrict candidates);
1300 
1301 int
1303  Xt_idxlist idxlist,
1304  int num_stripes,
1305  const struct Xt_stripe stripes[num_stripes],
1306  int *num_ext,
1307  struct Xt_pos_ext **pos_exts,
1308  int single_match_only)
1309 {
1310  size_t unmatched = 0;
1311  struct Xt_pos_ext_vec result;
1312  result.num_pos_ext = 0;
1313  struct Xt_idxstripes_ *restrict idxstripes = (struct Xt_idxstripes_ *)idxlist;
1314  if (num_stripes > 0)
1315  {
1316  if (idxstripes->flags & stripes_some_have_zero_stride_mask)
1317  idxstripes = expand_zero_stripes((size_t)idxstripes->num_stripes,
1318  idxstripes->stripes);
1319  result.size_pos_ext = (size_t)MAX(MIN(idxstripes->num_stripes,
1320  num_stripes), 8);
1321  result.pos_ext = xmalloc((result.size_pos_ext + 1)
1322  * sizeof (*result.pos_ext));
1323  /* put non-concatenable *terminator* at offset -1 */
1324  result.pos_ext[0] = (struct Xt_pos_ext){.start = INT_MIN, .size = 0 };
1325  ++result.pos_ext;
1326  struct Xt_pos_ext_vec cover;
1327  xt_cover_start(&cover, result.size_pos_ext);
1328  struct Xt_stripes_lookup stripes_db;
1329  create_stripes_lookup(&stripes_db, idxstripes);
1330  struct int_vec candidates = { .size = 0, .vec = NULL };
1331  for (size_t i = 0; i < (size_t)num_stripes; ++i) {
1332  struct Xt_stripe query = stripes[i];
1333  int j = query.nstrides;
1334  query.nstrides = ((query.stride != 0) | (query.nstrides == 0))
1335  ? query.nstrides : 1;
1336  query.stride = (Xt_int)((query.stride != 0 && query.nstrides != 1)
1337  ? query.stride : (Xt_int)1);
1338  find_candidates(query, &stripes_db, &candidates);
1339  do {
1341  query, &stripes_db, &result, &cover, single_match_only != 0,
1342  candidates.num, candidates.vec);
1343  } while ((stripes[i].stride == 0) & (--j > 0));
1344  }
1345  free(candidates.vec);
1346  --(result.pos_ext);
1347  memmove(result.pos_ext, result.pos_ext + 1,
1348  sizeof (*result.pos_ext) * result.num_pos_ext);
1349  *pos_exts = xrealloc(result.pos_ext,
1350  sizeof (*result.pos_ext) * result.num_pos_ext);
1351  destroy_stripes_lookup(&stripes_db);
1352  xt_cover_finish(&cover);
1353  if ((struct Xt_idxstripes_ *)idxlist != idxstripes) {
1354  free(idxstripes->stripes);
1355  xt_idxlist_delete((Xt_idxlist)idxstripes);
1356  }
1357  }
1358  *num_ext = (int)result.num_pos_ext;
1359  return (int)unmatched;
1360 }
1361 
1362 static size_t
1364  struct Xt_pos_ext pos_ext2add,
1365  const struct Xt_stripes_lookup *restrict db,
1366  struct Xt_pos_ext_vec *restrict result,
1367  struct Xt_pos_ext_vec *restrict cover,
1368  size_t num_candidates,
1369  int *restrict candidates);
1370 
1372 {
1373  size_t unmatched;
1374  struct Xt_stripe query_tail;
1375 };
1376 
1377 static struct unmatched_tail
1379  struct Xt_stripe query,
1380  const struct Xt_stripes_lookup *restrict stripes_lookup,
1381  struct Xt_pos_ext_vec *restrict result,
1382  struct Xt_pos_ext_vec *restrict cover,
1383  bool single_match_only,
1384  size_t num_candidates,
1385  int *restrict candidates);
1386 
1387 static size_t
1389  struct Xt_pos_ext pos_ext2add,
1390  const struct Xt_stripes_lookup *stripes_lookup,
1391  struct Xt_pos_ext_vec *restrict result,
1392  struct Xt_pos_ext_vec *restrict cover,
1393  bool single_match_only,
1394  size_t num_candidates,
1395  int *restrict candidates)
1396 {
1397  size_t unmatched = 0;
1398  if (single_match_only)
1399  unmatched += conditional_pos_ext_insert(
1400  query, pos_ext2add, stripes_lookup, result, cover,
1401  num_candidates, candidates);
1402  else
1403  append_ext(pos_ext2add, result);
1404  return unmatched;
1405 }
1406 
1407 
1408 
1409 size_t
1411  struct Xt_stripe query,
1412  const struct Xt_stripes_lookup *restrict db,
1413  struct Xt_pos_ext_vec *restrict result,
1414  struct Xt_pos_ext_vec *restrict cover,
1415  bool single_match_only,
1416  size_t num_candidates,
1417  int *restrict candidates)
1418 {
1419  size_t unmatched = 0;
1420  struct Xt_stripe_minmax query_minmax = xt_stripe2minmax(query);
1421  const struct Xt_stripe *restrict db_stripes = db->stripes;
1422  const int *restrict db_stripes_nstrides_psum = db->stripes_nstrides_psum;
1423  const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1424  for (size_t j = 0; j < num_candidates; ++j) {
1425  size_t unsort_pos = (size_t)candidates[j];
1426  size_t sort_pos = (size_t)db_stripes_sort[unsort_pos].inv_position;
1427  if ((query_minmax.min <= db_stripes_sort[sort_pos].range.max)
1428  & (query_minmax.max >= db_stripes_sort[sort_pos].range. min))
1429  ;
1430  else
1431  continue;
1432  struct Xt_stripe db_stripe = db_stripes[unsort_pos];
1433  Xt_int stride = query.stride;
1434  /* determine if db_stripe and query can be easily aligned */
1435  if ((stride > 0)
1436  & (stride == db_stripe.stride)
1437  & ((query.start - db_stripe.start) % stride == 0)) {
1438  /* divide query into skipped, matching and left-over parts,
1439  * where skipped and left-over are non-matching */
1440  Xt_int overlap_start = MAX(query.start, db_stripe.start);
1441  /* handle skipped query part */
1442  int skipLen = (int)((overlap_start - query.start) / stride);
1443  if (skipLen)
1444  {
1445  struct Xt_stripe query_head = {
1446  .start = query.start,
1447  .stride = skipLen > 1 ? stride : (Xt_int)1,
1448  .nstrides = skipLen };
1449  unmatched
1451  query_head, db, result, cover, single_match_only,
1452  num_candidates - j - 1, candidates + j + 1);
1453  query.start = (Xt_int)(query.start + stride * (Xt_int)skipLen);
1454  query.nstrides -= skipLen;
1455  query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1456  }
1457  int db_stripe_skip
1458  = (int)((overlap_start - db_stripe.start) / stride);
1459  int overlap_nstrides
1460  = imin(query.nstrides, db_stripe.nstrides - db_stripe_skip);
1461 
1462  unmatched +=
1464  (struct Xt_stripe){ .start = query.start,
1465  .stride = overlap_nstrides > 1 ? query.stride : (Xt_int)1,
1466  .nstrides = overlap_nstrides},
1467  (struct Xt_pos_ext){ .start
1468  = db_stripes_nstrides_psum[unsort_pos] + db_stripe_skip,
1469  .size = overlap_nstrides
1470  }, db, result, cover, single_match_only, num_candidates - j - 1,
1471  candidates + j + 1);
1472 
1473  if (!(query.nstrides -= overlap_nstrides))
1474  goto search_finished;
1475  else {
1476  query.start = (Xt_int)(overlap_start + stride * overlap_nstrides);
1477  query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1478  query_minmax = xt_stripe2minmax(query);
1479  continue;
1480  }
1481  }
1482  else
1483  {
1484  /* Handle complex overlap */
1485  struct unmatched_tail search_result =
1487  query, db, result, cover, single_match_only,
1488  num_candidates - j, candidates + j);
1489  unmatched += search_result.unmatched;
1490  if (!search_result.query_tail.nstrides)
1491  goto search_finished;
1492  else {
1493  query = search_result.query_tail;
1494  query_minmax = xt_stripe2minmax(query);
1495  }
1496  }
1497  }
1498  unmatched += (size_t)query.nstrides;
1499  /* query wasn't found, add all indices to unmatched */
1500 search_finished:
1501  return unmatched;
1502 }
1503 
1504 /* todo: add indices into pos_exts which are sorted by end/first of ranges */
1505 static size_t
1507  struct Xt_pos_ext pos_ext2add,
1508  const struct Xt_stripes_lookup *restrict db,
1509  struct Xt_pos_ext_vec *restrict result,
1510  struct Xt_pos_ext_vec *restrict cover,
1511  size_t num_candidates,
1512  int *restrict candidates)
1513 {
1514  /* single_match_only is true => never re-match positions */
1515  size_t unmatched = 0;
1516  Xt_int stride = query.stride;
1517  if (pos_ext2add.size == -1)
1518  pos_ext2add.size = 1;
1519  int querySizeMaskNeg = isign_mask(pos_ext2add.size);
1520 tail_search:
1521  ;
1522  int pos_ext2add_s = pos_ext2add.start
1523  + (querySizeMaskNeg & (pos_ext2add.size + 1)),
1524  pos_ext2add_e = pos_ext2add.start
1525  + (~querySizeMaskNeg & (pos_ext2add.size - 1));
1526  struct Xt_pos_range query_range
1527  = { .start = pos_ext2add_s, .end = pos_ext2add_e };
1528  /* does overlap exist? */
1529  size_t overlap_pos =
1530  xt_cover_insert_or_overlap(cover, query_range, true, 0);
1531  if (overlap_pos == SIZE_MAX) {
1532  /* remaining extent does not overlap any existing one */
1533  append_ext(pos_ext2add, result);
1534  } else {
1535  struct Xt_pos_ext *restrict pos_exts_ = cover->pos_ext;
1536  int dbSizeMaskNeg = isign_mask(pos_exts_[overlap_pos].size),
1537  db_s = pos_exts_[overlap_pos].start
1538  + (dbSizeMaskNeg & (pos_exts_[overlap_pos].size + 1)),
1539  db_e = pos_exts_[overlap_pos].start
1540  + (~dbSizeMaskNeg & (pos_exts_[overlap_pos].size - 1));
1541  /* determine length of overlap parts */
1542  int lowQuerySkip = db_s - pos_ext2add_s;
1543  int lowDbSkip = -lowQuerySkip;
1544  lowQuerySkip = (int)((unsigned)(lowQuerySkip + abs(lowQuerySkip))/2);
1545  lowDbSkip = (int)((unsigned)(lowDbSkip + abs(lowDbSkip))/2);
1546  int overlapLen = MIN(db_e - db_s - lowDbSkip + 1,
1547  abs(pos_ext2add.size) - lowQuerySkip);
1548  int highQuerySkip = abs(pos_ext2add.size) - lowQuerySkip - overlapLen;
1549  /* then adjust lengths to direction of overlap (from
1550  * perspective of pos_ext2add */
1551  int querySkipLen = (~querySizeMaskNeg & lowQuerySkip)
1552  | (querySizeMaskNeg & -highQuerySkip),
1553  queryTailLen = (querySizeMaskNeg & -lowQuerySkip)
1554  | (~querySizeMaskNeg & highQuerySkip);
1555  if (querySkipLen)
1556  {
1557  int absQuerySkipLen = abs(querySkipLen);
1558  struct Xt_stripe query_skip = {
1559  .start = query.start,
1560  .stride = absQuerySkipLen > 1 ? stride : (Xt_int)1,
1561  .nstrides = absQuerySkipLen,
1562  };
1563  struct Xt_pos_ext pos_ext2add_skip = {
1564  .start = pos_ext2add.start,
1565  .size = querySkipLen
1566  };
1567  unmatched
1569  query_skip, pos_ext2add_skip, db,
1570  result, cover, num_candidates, candidates);
1571  pos_exts_ = result->pos_ext;
1572  query.start = (Xt_int)(query.start
1573  + stride * (Xt_int)absQuerySkipLen);
1574  query.nstrides -= absQuerySkipLen;
1575  query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1576  pos_ext2add.start += querySkipLen;
1577  pos_ext2add.size -= querySkipLen;
1578  }
1579  /* head part of (remaining) query matches part of already inserted index
1580  * range */
1581  struct Xt_stripe query_head = {
1582  .start = query.start,
1583  .stride = abs(overlapLen) > 1 ? stride : (Xt_int)1,
1584  .nstrides = abs(overlapLen) };
1585  /* restart search for overlapping part within following ranges */
1586  unmatched
1588  query_head, db, result, cover, true,
1589  num_candidates, candidates);
1590  pos_exts_ = result->pos_ext;
1591  if (queryTailLen) {
1592  /* shorten query accordingly */
1593  query.nstrides -= abs(overlapLen);
1594  query.start = (Xt_int)(query.start + stride * (Xt_int)abs(overlapLen));
1595  query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1596  int directedOverlapLen = (~querySizeMaskNeg & overlapLen)
1597  | (querySizeMaskNeg & -overlapLen);
1598  pos_ext2add.start += directedOverlapLen;
1599  pos_ext2add.size -= directedOverlapLen;
1600  goto tail_search;
1601  }
1602  /* whole range handled, return */
1603  }
1604  return unmatched;
1605 }
1606 
1607 static Xt_int
1609  Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1610  return idxstripes->min_index;
1611 }
1612 
1613 static Xt_int
1615  Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1616  return idxstripes->max_index;
1617 }
1618 
1619 /* unfortunately, Cray cc 8.[56].x miscompile the following function,
1620  * hence optimizations must be disabled. Feel free to dig deeper into
1621  * the problem, but it's fixed in 8.7. */
1622 #if defined _CRAYC && _RELEASE_MAJOR == 8 && _RELEASE_MINOR >= 5 && _RELEASE_MINOR < 7
1623 #pragma _CRI noopt
1624 #endif
1625 static struct unmatched_tail
1627  struct Xt_stripe query,
1628  const struct Xt_stripes_lookup *restrict db,
1629  struct Xt_pos_ext_vec *restrict result,
1630  struct Xt_pos_ext_vec *restrict cover,
1631  bool single_match_only,
1632  size_t num_candidates,
1633  int *restrict candidates)
1634 {
1635  size_t unmatched = 0;
1636  const struct Xt_stripe *restrict db_stripes = db->stripes;
1637  size_t db_stripe_pos = (size_t)candidates[0];
1638  struct Xt_stripe overlap = get_stripe_intersection(query,
1639  db_stripes[db_stripe_pos]);
1640  if (overlap.nstrides == 0)
1641  return (struct unmatched_tail){ .unmatched = 0, .query_tail = query};
1642 
1643  int skipped = (int)((overlap.start - query.start)/query.stride);
1644  if (skipped)
1645  {
1647  (struct Xt_stripe){ .start = query.start,
1648  .stride = skipped > 1 ? query.stride : (Xt_int)1,
1649  .nstrides = skipped}, db, result, cover,
1650  single_match_only, num_candidates - 1, candidates + 1);
1651  query.start = (Xt_int)(query.start + skipped * query.stride);
1652  query.nstrides -= skipped;
1653  query.stride = query.nstrides > 1 ? query.stride : 1;
1654  }
1655  /* Since overlap.nstrides > 0, overlap.start always matches, but
1656  * depending on the stride the remaining overlapping indices might or might
1657  * not be consecutive in the query, in the latter case intervening
1658  * parts need to be searched for too.
1659  * Stripes of length 1 are naturally consecutive.
1660  */
1661  int db_stripe_skip
1662  = (int)((overlap.start - db_stripes[db_stripe_pos].start)
1663  / db_stripes[db_stripe_pos].stride);
1664  int db_pos = db->stripes_nstrides_psum[db_stripe_pos] + db_stripe_skip;
1665  if (((overlap.stride == query.stride)
1666  & (overlap.stride == db_stripes[db_stripe_pos].stride))
1667  | (overlap.nstrides == 1))
1668  {
1669  unmatched += pos_ext_insert(overlap, (struct Xt_pos_ext){
1670  .start = db_pos,
1671  .size = overlap.nstrides },
1672  db, result, cover, single_match_only, num_candidates - 1, candidates + 1);
1673  query.nstrides -= overlap.nstrides;
1674  query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
1675  query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1676  }
1677  else if ((overlap.stride == query.stride)
1678  & (overlap.stride == -db_stripes[db_stripe_pos].stride))
1679  {
1680  /* all parts of the overlap can be used directly,
1681  but are inversely sorted in db_stripe */
1682  unmatched += pos_ext_insert(overlap, (struct Xt_pos_ext){
1683  .start = db_pos, .size = -overlap.nstrides },
1684  db, result, cover, single_match_only,
1685  num_candidates - 1, candidates + 1);
1686  query.nstrides -= overlap.nstrides;
1687  query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
1688  query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1689  }
1690  else if (overlap.stride == query.stride)
1691  {
1692  /* all parts of the overlap can be used but are non-consecutive
1693  * in db_stripe */
1694  int db_step = (int)(overlap.stride/db_stripes[db_stripe_pos].stride);
1695  /* todo: try to keep (prefix of) stripe together if the
1696  * corresponding positions cannot be inserted anyway */
1697  for (int i = 0; i < overlap.nstrides; ++i, db_pos += db_step)
1698  unmatched += pos_ext_insert(
1699  (struct Xt_stripe){ (Xt_int)(overlap.start + i*overlap.stride), 1, 1 },
1700  (struct Xt_pos_ext){ .start = db_pos, .size = 1 },
1701  db, result, cover, single_match_only,
1702  num_candidates - 1, candidates + 1);
1703  query.nstrides -= overlap.nstrides;
1704  query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
1705  query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1706  }
1707  else /* overlap.stride != query.stride => overlap.stride > query.stride */
1708  {
1709  /*
1710  * query.start = (Xt_int)(query.start
1711  * + overlap.stride * overlap.nstrides);
1712  */
1713  int stride_step = (int)(overlap.stride / query.stride);
1714  int db_step = (int)(overlap.stride/db_stripes[db_stripe_pos].stride);
1715  do {
1716  struct Xt_stripe consecutive_overlap = { .start = query.start,
1717  .stride = 1,
1718  .nstrides = 1 },
1719  intervening = { .start = (Xt_int)(query.start + query.stride),
1720  .stride = query.stride,
1721  .nstrides = MIN(query.nstrides - 1, stride_step - 1) };
1722  /* split off start index, then handle intervening */
1723  unmatched +=
1724  pos_ext_insert(consecutive_overlap, (struct Xt_pos_ext){
1725  .start = db_pos, .size = 1 },
1726  db, result, cover, single_match_only,
1727  num_candidates - 1, candidates + 1);
1728  db_pos += db_step;
1729  if (--query.nstrides > 0) {
1730  unmatched +=
1732  intervening, db, result, cover, single_match_only,
1733  num_candidates - 1, candidates + 1);
1734  query.nstrides -= intervening.nstrides;
1735  }
1736  query.start = (Xt_int)(query.start + query.stride * stride_step);
1737  query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1738  overlap.start = (Xt_int)(overlap.start + overlap.stride);
1739  } while (--overlap.nstrides);
1740  }
1741  return (struct unmatched_tail){ .unmatched = unmatched, .query_tail = query};
1742 }
1743 
1744 /*
1745  * Local Variables:
1746  * c-basic-offset: 2
1747  * coding: utf-8
1748  * indent-tabs-mode: nil
1749  * show-trailing-whitespace: t
1750  * require-trailing-newline: t
1751  * End:
1752  */
int MPI_Comm
Definition: core.h:64
#define die(msg)
Definition: core.h:131
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
#define INSTR_STOP(T)
Definition: instr.h:69
#define INSTR_DEF(T, S)
Definition: instr.h:66
#define INSTR_START(T)
Definition: instr.h:68
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
Definition: ppm_xfuncs.h:71
#define xmalloc(size)
Definition: ppm_xfuncs.h:70
struct Xt_stripe * stripes
Xt_int * index_array_cache
struct Xt_stripes_sort stripes_sort[]
struct Xt_idxlist_ parent
struct Xt_pos_ext * pos_ext
Definition: xt_cover.h:60
size_t num_pos_ext
Definition: xt_cover.h:59
size_t size_pos_ext
Definition: xt_cover.h:59
int size
Definition: xt_core.h:93
int start
Definition: xt_core.h:93
Xt_int stride
Definition: xt_stripe.h:56
int nstrides
Definition: xt_stripe.h:57
Xt_int start
Definition: xt_stripe.h:55
const struct Xt_stripe * stripes
const struct Xt_stripes_sort * stripes_sort
struct Xt_stripe_minmax range
size_t size
size_t num
struct Xt_stripe query_tail
void(* delete)(Xt_idxlist)
static Xt_tword xlimul(Xt_long a, Xt_int b)
static Xt_long xlladd(Xt_long a, Xt_long b)
static Xt_long xiisub(Xt_int a, Xt_int b)
static Xt_long xi2l(Xt_int a)
static Xt_ldiv xlldiv(Xt_long a, Xt_long b)
static Xt_long xliadd(Xt_long a, Xt_int b)
static Xt_idiv xlidivu(Xt_ulong a, Xt_uint b)
static int xlicmp_lt(Xt_long a, Xt_int b)
static Xt_long xiimul(Xt_int a, Xt_int b)
static bool xl_is_in_xt_int_range(Xt_long a)
@ xt_int_bits
static Xt_long xlabs(Xt_long a)
static Xt_long xilsub(Xt_int a, Xt_long b)
static Xt_long xlinc(Xt_long a, bool b)
static int xlsign(Xt_long x)
static int xlicmp_le(Xt_long a, Xt_int b)
static int xlicmp_ge(Xt_long a, Xt_int b)
static int isign(int x)
static int isign_mask(int x)
static Xt_int Xt_isign_mask(Xt_int x)
static int xinlz(Xt_uint v)
static int imin(int a, int b)
static Xt_int Xt_isign(Xt_int x)
static long long llsign(long long x)
base definitions header file
int xt_initialized(void)
Definition: xt_init.c:107
#define Xt_int_dt
Definition: xt_core.h:69
XT_INT Xt_int
Definition: xt_core.h:68
unsigned XT_INT Xt_uint
Definition: xt_core.h:70
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
Definition: xt_cover.c:69
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, bool forward, size_t search_start_pos)
Definition: xt_cover.c:148
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
Definition: xt_cover.c:60
Xt_idxlist xt_idxempty_new(void)
Definition: xt_idxempty.c:165
index list declaration
void xt_idxlist_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
Definition: xt_idxlist.c:118
Xt_idxlist xt_idxlist_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
void xt_idxlist_delete(Xt_idxlist idxlist)
Definition: xt_idxlist.c:74
Provide non-public declarations common to all index lists.
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
@ STRIPES
@ stripes_some_have_zero_stride_mask
@ stripes_do_overlap_bit
@ stripes_do_overlap_mask
@ stripes_some_have_zero_stride_bit
static int idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static struct unmatched_tail idxstripes_complex_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static int compare_xtstripes(const void *a_, const void *b_)
static int idxstripes_get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe *stripes, int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only)
static void idxstripes_delete(Xt_idxlist data)
void xt_idxstripes_initialize(void)
static struct Xt_stripe get_stripe_intersection(struct Xt_stripe stripe_a, struct Xt_stripe stripe_b)
static int idxstripes_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
#define MIN(a, b)
Definition: xt_idxstripes.c:76
static void create_stripes_lookup(struct Xt_stripes_lookup *restrict db, Xt_idxstripes idxstripes)
static size_t bsearch_stripes_sort(size_t n, const struct Xt_stripes_sort a[n], Xt_int min_key)
static void idxstripes_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
static struct extended_gcd extended_gcd(Xt_int a, Xt_int b)
void xt_idxstripes_finalize(void)
Xt_idxlist xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
Xt_idxlist xt_idxstripes_from_idxlist_new(Xt_idxlist idxlist_src)
static struct Xt_idxstripes_ * expand_zero_stripes(size_t num_stripes, const struct Xt_stripe *restrict stripes)
static void idxstripes_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static int idxstripes_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static size_t pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static void idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static int idxstripes_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static const Xt_int * idxstripes_get_indices_const(Xt_idxlist idxlist)
struct Xt_idxstripes_ * Xt_idxstripes
static void destroy_stripes_lookup(struct Xt_stripes_lookup *restrict db)
static void find_candidates(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct int_vec *candidates)
static Xt_idxlist compute_intersection_fallback(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst)
Xt_idxlist xt_idxstripes_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
static void append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
static const struct xt_idxlist_vtable idxstripes_vtable
static Xt_idxlist idxstripes_compute_intersection(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst)
static size_t idxstripes_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
Xt_idxlist xt_idxstripes_new(struct Xt_stripe const *stripes, int num_stripes)
static Xt_idxlist idxstripes_copy(Xt_idxlist idxlist)
static size_t idxstripes_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static Xt_int idxstripes_get_max_index(Xt_idxlist idxlist)
static Xt_int idxstripes_get_min_index(Xt_idxlist idxlist)
static Xt_idxlist idxstripes_aggregate(Xt_idxstripes idxstripes, const char *caller)
static size_t conditional_pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, size_t num_candidates, int *restrict candidates)
static MPI_Datatype stripe_dt
#define MAX(a, b)
Definition: xt_idxstripes.c:77
static bool xt_can_merge_pos_ext(struct Xt_pos_ext a, struct Xt_pos_ext b)
Xt_idxlist xt_idxvec_from_stripes_new(const struct Xt_stripe *stripes, int num_stripes)
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition: xt_mpi.h:68
void(* xt_sort_int)(int *a, size_t n)
Definition: xt_sort.c:53
size_t xt_stripes_merge_copy(size_t num_stripes, struct Xt_stripe *stripes_dst, const struct Xt_stripe *stripes_src, bool lookback)
Definition: xt_stripe.c:122
static struct Xt_stripe_minmax xt_stripe2minmax(struct Xt_stripe stripe)