Yet Another eXchange Tool  0.9.0
xt_idxvec.c
Go to the documentation of this file.
1 
12 /*
13  * Keywords:
14  * Maintainer: Jörg Behrens <behrens@dkrz.de>
15  * Moritz Hanke <hanke@dkrz.de>
16  * Thomas Jahns <jahns@dkrz.de>
17  * URL: https://doc.redmine.dkrz.de/yaxt/html/
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are
21  * met:
22  *
23  * Redistributions of source code must retain the above copyright notice,
24  * this list of conditions and the following disclaimer.
25  *
26  * Redistributions in binary form must reproduce the above copyright
27  * notice, this list of conditions and the following disclaimer in the
28  * documentation and/or other materials provided with the distribution.
29  *
30  * Neither the name of the DKRZ GmbH nor the names of its contributors
31  * may be used to endorse or promote products derived from this software
32  * without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  */
46 #ifdef HAVE_CONFIG_H
47 #include <config.h>
48 #endif
49 
50 #include <assert.h>
51 #include <limits.h>
52 #include <stdbool.h>
53 #include <stdlib.h>
54 #include <stdio.h>
55 #include <string.h>
56 
57 #include "xt/xt_core.h"
58 #include "xt/xt_idxlist.h"
59 #include "xt_idxlist_internal.h"
60 #include "xt/xt_idxempty.h"
61 #include "xt/xt_idxvec.h"
62 #include "xt_idxvec_internal.h"
63 #include "xt/xt_idxstripes.h"
64 #include "xt/xt_mpi.h"
65 #include "xt_idxlist_unpack.h"
66 #include "core/ppm_xfuncs.h"
67 #include "core/core.h"
68 #include "xt_stripe_util.h"
69 #include "xt/quicksort.h"
70 #include "instr.h"
71 
72 #define MIN(a,b) (((a)<(b))?(a):(b))
73 
74 static void
76 
77 static size_t
79 
80 static void
81 idxvec_pack(Xt_idxlist data, void *buffer, int buffer_size,
82  int *position, MPI_Comm comm);
83 
84 static Xt_idxlist
85 idxvec_copy(Xt_idxlist idxlist);
86 
87 static void
88 idxvec_get_indices(Xt_idxlist idxlist, Xt_int *indices);
89 
90 static Xt_int const*
92 
93 static void
94 idxvec_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe ** stripes,
95  int * num_stripes);
96 
97 static int
98 idxvec_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int * index);
99 
100 static int
101 idxvec_get_indices_at_positions(Xt_idxlist idxlist, const int *positions,
102  int num, Xt_int *index, Xt_int undef_idx);
103 
104 static int
105 idxvec_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int * position);
106 
107 static int
109  int * position, int offset);
110 
111 static int
112 idxvec_get_positions_of_indices(Xt_idxlist idxlist, const Xt_int *indices,
113  int num_indices, int *positions,
114  int single_match_only);
115 
116 static Xt_int
118 
119 static Xt_int
121 
122 static const struct xt_idxlist_vtable idxvec_vtable = {
124  .get_pack_size = idxvec_get_pack_size,
125  .pack = idxvec_pack,
126  .copy = idxvec_copy,
127  .get_indices = idxvec_get_indices,
128  .get_indices_const = idxvec_get_indices_const,
129  .get_index_stripes = idxvec_get_index_stripes,
130  .get_index_at_position = idxvec_get_index_at_position,
131  .get_indices_at_positions = idxvec_get_indices_at_positions,
132  .get_position_of_index = idxvec_get_position_of_index,
133  .get_positions_of_indices = idxvec_get_positions_of_indices,
134  .get_position_of_index_off = idxvec_get_position_of_index_off,
135  .get_positions_of_indices_off = NULL,
136  .get_min_index = idxvec_get_min_index,
137  .get_max_index = idxvec_get_max_index,
138  .get_bounding_box = NULL,
139  .idxlist_pack_code = VECTOR,
140 };
141 
142 typedef struct Xt_idxvec_ *Xt_idxvec;
143 
144 // index vector data structure
145 struct Xt_idxvec_ {
146 
147  struct Xt_idxlist_ parent;
148 
149  const Xt_int *vector;
150 
151  // internal array used to optimise access to vector data
152  const Xt_int *sorted_vector; // sorted version of vector
153  int *sorted_vec_positions; // original positions of the
154  // indices in sorted_vector
155  /*
156  we have the following relations:
157  sorted_vector[i-1] <= sorted_vector[i],
158  vector[sorted_vec_positions[i]] = sorted_vector[i]
159  */
160 };
161 
162 
164  INSTR_DEF(t_idxvec_new,"xt_idxvec_new")
165  // ensure that yaxt is initialized
166  assert(xt_initialized());
167 
168  if (num_indices > 0)
169  ;
170  else if (num_indices == 0)
171  return xt_idxempty_new();
172  else
173  die("number of indices passed to xt_idxvec_new must not be negative!");
174 
175  INSTR_START(t_idxvec_new);
176  size_t vector_size = (size_t)num_indices * sizeof (idxvec[0]),
177  header_size = ((sizeof (struct Xt_idxvec_) + sizeof (Xt_int) - 1)
178  /sizeof (Xt_int)) * sizeof (Xt_int);
179  struct Xt_idxvec_ *restrict idxvec_obj = xmalloc(header_size + vector_size);
180  Xt_idxlist_init(&idxvec_obj->parent, &idxvec_vtable, num_indices);
181 
182  Xt_int *vector_assign = (Xt_int *)(void *)((unsigned char *)idxvec_obj + header_size);
183  idxvec_obj->vector = vector_assign;
184  memcpy(vector_assign, idxvec, vector_size);
185  idxvec_obj->sorted_vector = NULL;
186  idxvec_obj->sorted_vec_positions = NULL;
187 
188  INSTR_STOP(t_idxvec_new);
189  return (void *)idxvec_obj;
190 }
191 
192 Xt_idxlist xt_idxvec_prealloc_new(const Xt_int *idxvec, int num_indices)
193 {
194  if (num_indices > 0)
195  ;
196  else if (num_indices == 0)
197  return xt_idxempty_new();
198  else
199  die("number of indices passed to xt_idxvec_new must not be negative!");
200  struct Xt_idxvec_ *restrict idxvec_obj = xmalloc(sizeof (*idxvec_obj));
201  Xt_idxlist_init(&idxvec_obj->parent, &idxvec_vtable, num_indices);
202  idxvec_obj->vector = idxvec;
203  idxvec_obj->sorted_vector = NULL;
204  idxvec_obj->sorted_vec_positions = NULL;
205  return (void *)idxvec_obj;
206 }
207 
208 static size_t
210  int * sorted_vec_pos, int pos_offset) {
211 
212  Xt_int stride = stripe.stride, start;
213  int sign;
214  if (stride >= 0) {
215  sign = 1;
216  start = stripe.start;
217  } else {
218  sign = -1;
219  start = (Xt_int)(stripe.start + (Xt_int)(stripe.nstrides-1) * stride);
220  stride = (Xt_int)-stride;
221  pos_offset += stripe.nstrides-1;
222  }
223  for (int i = 0; i < stripe.nstrides; ++i) {
224  sorted_vector[i] = (Xt_int)(start + i * stride);
225  sorted_vec_pos[i] = pos_offset + i * sign;
226  }
227 
228  return (size_t)stripe.nstrides;
229 }
230 
231 #define MAX(a,b) ((a) >= (b) ? (a) : (b))
232 
233 static void
235  int num_stripes_,
236  Xt_idxvec idxvec) {
237 
238  if (num_stripes_ <= 0) {
239  idxvec->sorted_vector = NULL;
240  idxvec->sorted_vec_positions = NULL;
241  return;
242  }
243 
244  size_t num_stripes = (size_t)num_stripes_;
245  Xt_int *restrict sorted_vector_assign
246  = xmalloc((size_t)idxvec->parent.num_indices
247  * sizeof(*(idxvec->sorted_vector)));
248  idxvec->sorted_vector = sorted_vector_assign;
250  = idxvec->sorted_vec_positions
251  = xmalloc((size_t)idxvec->parent.num_indices *
252  sizeof(*(idxvec->sorted_vec_positions)));
253 
254  /* stripe_minmax[0][i] is the minimal index in stripe i at first,
255  * later of sorted stripe i, stripe_minmax[1][i] is the
256  * corresponding maximal index */
257  Xt_int (*restrict stripe_minmax)[num_stripes]
258  = xmalloc(2 * sizeof(*stripe_minmax));
259  int *restrict sorted_stripe_min_pos
260  = xmalloc(num_stripes * 3 * sizeof(*sorted_stripe_min_pos));
261 
262  for(size_t i = 0; i < num_stripes; ++i) {
263  Xt_int ofs = (Xt_int)(stripes[i].stride * (stripes[i].nstrides - 1)),
264  mask = Xt_isign_mask(ofs);
265  stripe_minmax[0][i] = (Xt_int)(stripes[i].start + (ofs & mask));
266  }
267 
268  xt_quicksort_index(stripe_minmax[0], (int)num_stripes,
269  sorted_stripe_min_pos, 1);
270 
271  int *restrict sorted_pos_prefix_sum = sorted_stripe_min_pos + num_stripes,
272  *restrict orig_pos_prefix_sum
273  = xmalloc(num_stripes * sizeof(*orig_pos_prefix_sum));
274 
275  int accum = 0;
276  for (size_t i = 0; i < num_stripes; ++i) {
277  orig_pos_prefix_sum[i] = accum;
278  accum += stripes[i].nstrides;
279  }
280 
281  for (size_t i = 0; i < num_stripes; ++i) {
282  int sorted_pos = sorted_stripe_min_pos[i];
283  sorted_pos_prefix_sum[i] = orig_pos_prefix_sum[sorted_pos];
284  Xt_int ofs = (Xt_int)(stripes[sorted_pos].stride
285  * (stripes[sorted_pos].nstrides - 1)),
286  mask = Xt_isign_mask(ofs);
287  stripe_minmax[1][i] = (Xt_int)(stripes[sorted_pos].start + (ofs & ~mask));
288  }
289 
290  free(orig_pos_prefix_sum);
291 
292  /* i'th stripe overlaps with overlap_count[i] following stripes, or
293  * is part of a stretch of this many overlapping stripes, if
294  * overlap_count[i] is > 0, in case overlap_count[i] <= 0, this many
295  * non-overlapping stripes follow after negation + 1 */
296  int *restrict overlap_count
297  = sorted_stripe_min_pos + 2 * num_stripes;
298  for (size_t i = 0; i < num_stripes - 1; ++i) {
299  bool do_overlap = stripe_minmax[1][i] >= stripe_minmax[0][i + 1];
300  size_t j = i + 1;
301  if (do_overlap) {
302  /* range_max_idx is the maximal index encountered in a rage of
303  * overlapping stripes, only stop when a stripe starting at
304  * index larger than this is encountered */
305  Xt_int range_max_idx = MAX(stripe_minmax[1][i], stripe_minmax[1][i+1]);
306  while (j + 1 < num_stripes
307  && stripe_minmax[0][j + 1] <= range_max_idx) {
308  range_max_idx = MAX(range_max_idx, stripe_minmax[1][j+1]);
309  ++j;
310  }
311  overlap_count[i] = (int)(j - i);
312  i = j;
313  } else {
314  while (j + 1 < num_stripes
315  && stripe_minmax[0][j + 1] > stripe_minmax[1][j])
316  ++j;
317  overlap_count[i] = -(int)(j - i - 1);
318  i = j - 1;
319  }
320  }
321  overlap_count[num_stripes - 1] = 0;
322 
323  size_t offset = 0;
324 
325  size_t i = 0;
326  while (i < num_stripes) {
327 
328  bool do_overlap = overlap_count[i] > 0;
329  size_t num_selection = (size_t)(abs(overlap_count[i])) + 1;
330  size_t sel_size = 0;
331 
332  for (size_t j = 0; j < num_selection; ++j)
333  sel_size += decode_stripe(stripes[sorted_stripe_min_pos[i+j]],
334  sorted_vector_assign + offset + sel_size,
335  sorted_vec_positions + offset + sel_size,
336  sorted_pos_prefix_sum[i+j]);
337 
338  if (do_overlap)
339  xt_quicksort_xt_int_permutation(sorted_vector_assign + offset,
340  sel_size,
341  sorted_vec_positions + offset);
342 
343  offset += sel_size;
344  i += num_selection;
345  }
346 
347  free(sorted_stripe_min_pos);
348  free(stripe_minmax);
349 }
350 
352 xt_idxvec_from_stripes_new(const struct Xt_stripe stripes[],
353  int num_stripes) {
354  // ensure that yaxt is initialized
355  assert(xt_initialized());
356 
357  long long num_indices = 0;
358 
359  for (int i = 0; i < num_stripes; ++i)
360  num_indices += stripes[i].nstrides;
361  assert((sizeof (long long) > sizeof (int)) & (num_indices <= INT_MAX)
362  & (num_indices >= 0));
363 
364  Xt_idxlist idxlist;
365  if (num_indices > 0) {
366  size_t vector_size = (size_t)num_indices * sizeof (Xt_int),
367  header_size = ((sizeof (struct Xt_idxvec_) + sizeof (Xt_int) - 1)
368  /sizeof (Xt_int)) * sizeof (Xt_int);
369  Xt_idxvec idxvec_obj = xmalloc(header_size + vector_size);
370  Xt_int *restrict indices
371  = (Xt_int *)(void *)((unsigned char *)idxvec_obj + header_size);
372  idxvec_obj->vector = indices;
373 
374  size_t k = (size_t)-1;
375  for (int i = 0; i < num_stripes; ++i)
376  for (int j = 0; j < stripes[i].nstrides; ++j)
377  indices[++k] = (Xt_int)(stripes[i].start + j * stripes[i].stride);
378 
379  Xt_idxlist_init(&idxvec_obj->parent, &idxvec_vtable, (int)num_indices);
380 
381  generate_sorted_vector_from_stripes(stripes, num_stripes, idxvec_obj);
382  idxlist = (Xt_idxlist)idxvec_obj;
383  } else
384  idxlist = xt_idxempty_new();
385  return idxlist;
386 }
387 
388 static void idxvec_delete(Xt_idxlist obj) {
389 
390  if (((Xt_idxvec)obj)->sorted_vector != ((Xt_idxvec)obj)->vector)
391  free((void *)(((Xt_idxvec)obj)->sorted_vector));
392  free(((Xt_idxvec)obj)->sorted_vec_positions);
393  free(obj);
394 }
395 
396 static size_t idxvec_get_pack_size(Xt_idxlist obj, MPI_Comm comm) {
397 
398  Xt_idxvec idxvec = (Xt_idxvec)obj;
399  int size_xt_idx, size_header;
400 
401  xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
402  xt_mpi_call(MPI_Pack_size(idxvec->parent.num_indices, Xt_int_dt, comm,
403  &size_xt_idx), comm);
404 
405  return (size_t)size_xt_idx + (size_t)size_header;
406 }
407 
408 void idxvec_pack(Xt_idxlist obj, void *buffer, int buffer_size,
409  int *position, MPI_Comm comm) {
410 
411  assert(obj);
412  Xt_idxvec idxvec = (Xt_idxvec)obj;
413  int header[2] = { VECTOR, idxvec->parent.num_indices };
414  xt_mpi_call(MPI_Pack(header, 2, MPI_INT, buffer,
415  buffer_size, position, comm), comm);
416  if (idxvec->parent.num_indices != 0)
417  xt_mpi_call(MPI_Pack(CAST_MPI_SEND_BUF(idxvec->vector),
418  idxvec->parent.num_indices,
419  Xt_int_dt, buffer,
420  buffer_size, position, comm), comm);
421 }
422 
423 Xt_idxlist xt_idxvec_unpack(void *buffer, int buffer_size, int *position,
424  MPI_Comm comm) {
425 
426  int num_indices;
427 
428  xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
429  &num_indices, 1, MPI_INT, comm), comm);
430 
431  size_t vector_size = (size_t)num_indices * sizeof (Xt_int),
432  header_size = ((sizeof (struct Xt_idxvec_) + sizeof (Xt_int) - 1)
433  /sizeof (Xt_int)) * sizeof (Xt_int);
434  Xt_idxvec idxvec = xmalloc(header_size + vector_size);
435  Xt_idxlist_init(&idxvec->parent, &idxvec_vtable, num_indices);
436 
437  Xt_int *vector_assign = (Xt_int *)(void *)((unsigned char *)idxvec + header_size);
438  idxvec->vector = vector_assign;
439  if (num_indices != 0) {
440  xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
441  vector_assign, num_indices,
442  Xt_int_dt, comm), comm);
443  } else {
444  fputs("warning: implementation generated empty vector!\n", stderr);
445  idxvec->vector = NULL;
446  }
447 
448  idxvec->sorted_vector = NULL;
449  idxvec->sorted_vec_positions = NULL;
450 
451  return (Xt_idxlist)idxvec;
452 }
453 
454 static const Xt_int *
456 
457  if (idxvec->sorted_vector != NULL)
458  return idxvec->sorted_vector;
459 
460  size_t num_indices = (size_t)idxvec->parent.num_indices;
461  idxvec->sorted_vec_positions = xmalloc((size_t)num_indices *
462  sizeof(*(idxvec->sorted_vec_positions)));
463 
464 
465  const Xt_int *restrict vector = idxvec->vector;
466  bool sorted = true;
467  // check if we are already sorted:
468  for (size_t i = 1; i < num_indices; ++i)
469  sorted &= (vector[i-1] <= vector[i]);
470 
471  /* we are done if vector is already sorted */
472  if (sorted) {
473  // gen id-map:
474  for (size_t i = 0; i < num_indices; ++i)
475  idxvec->sorted_vec_positions[i] = (int)i;
476  return idxvec->sorted_vector = vector;
477  }
478  size_t svec_size = num_indices * sizeof(Xt_int);
479  Xt_int *sorted_vector = xmalloc(svec_size);
480 
481  memcpy(sorted_vector, vector, svec_size);
482 
483  xt_quicksort_index(sorted_vector, (int)num_indices,
484  idxvec->sorted_vec_positions, 1);
485 
486  idxvec->sorted_vector = sorted_vector;
487 
488  return sorted_vector;
489 }
490 
493 
494  // both lists are index vectors:
495 
496  Xt_idxvec idxvec_src = (Xt_idxvec)idxlist_src,
497  idxvec_dst = (Xt_idxvec)idxlist_dst;
498 
499 
500  size_t num_indices_inter = 0,
501  num_indices_src = (size_t)idxvec_src->parent.num_indices,
502  num_indices_dst = (size_t)idxvec_dst->parent.num_indices;
503 
504  size_t vector_size = num_indices_dst * sizeof (idxvec_dst->vector[0]),
505  header_size = ((sizeof (struct Xt_idxvec_) + sizeof (Xt_int) - 1)
506  /sizeof (Xt_int)) * sizeof (Xt_int);
507 
508  Xt_idxvec inter_vector = xmalloc(header_size + vector_size);
509 
510  Xt_int *vector_assign
511  = (Xt_int *)(void *)((unsigned char *)inter_vector + header_size);
512  inter_vector->vector = vector_assign;
513 
514 
515  // get sorted indices of source and destination
516  const Xt_int *restrict sorted_src_vector = get_sorted_vector(idxvec_src),
517  *restrict sorted_dst_vector = get_sorted_vector(idxvec_dst);
518 
519  // compute the intersection
520  for (size_t i = 0, j = 0; i < num_indices_dst; ++i) {
521 
522  while (j < num_indices_src &&
523  sorted_src_vector[j] < sorted_dst_vector[i]) ++j;
524  if (j >= num_indices_src) break;
525  if (sorted_src_vector[j] == sorted_dst_vector[i])
526  vector_assign[num_indices_inter++] = sorted_dst_vector[i];
527  }
528 
529  if (num_indices_inter) {
530  vector_size = (size_t)num_indices_inter * sizeof (idxvec_dst->vector[0]);
531  inter_vector = xrealloc(inter_vector, header_size + vector_size);
532  inter_vector->vector
533  = (Xt_int *)(void *)((unsigned char *)inter_vector + header_size);
534  } else {
535  free(inter_vector);
536  return xt_idxempty_new();
537  }
538 
539  Xt_idxlist_init(&inter_vector->parent, &idxvec_vtable, (int)num_indices_inter);
540  inter_vector->sorted_vector = NULL;
541  inter_vector->sorted_vec_positions = NULL;
542 
543  return (Xt_idxlist)inter_vector;
544 }
545 
546 static Xt_idxlist
548 
549  Xt_idxvec idxvec_obj = (Xt_idxvec)idxlist;
550 
551  return xt_idxvec_new(idxvec_obj->vector, idxvec_obj->parent.num_indices);
552 }
553 
554 static void
556 
557  Xt_idxvec idxvec_obj = (Xt_idxvec)idxlist;
558 
559  memcpy(indices, idxvec_obj->vector,
560  (size_t)idxvec_obj->parent.num_indices * sizeof(*indices));
561 }
562 
563 static Xt_int const*
565  Xt_idxvec idxvec = (Xt_idxvec)idxlist;
566 
567  return idxvec->vector;
568 }
569 
570 
571 static void
572 idxvec_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe ** stripes,
573  int * num_stripes) {
574 
575  Xt_idxvec idxvec_obj = (Xt_idxvec)idxlist;
576 
578  idxvec_obj->parent.num_indices,
579  stripes, num_stripes);
580 }
581 
582 static int
583 idxvec_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int * index) {
584 
585  Xt_idxvec idxvec_obj = (Xt_idxvec)idxlist;
586 
587  if (position < 0 || position >= idxvec_obj->parent.num_indices)
588  return 1;
589 
590  *index = idxvec_obj->vector[position];
591 
592  return 0;
593 }
594 
595 static int
597  const int *restrict positions,
598  int num_pos_, Xt_int *index,
599  Xt_int undef_idx) {
600 
601  Xt_idxvec idxvec = (Xt_idxvec)idxlist;
602  size_t num_indices = (size_t)idxvec->parent.num_indices;
603  const Xt_int *restrict v = idxvec->vector;
604 
605  int undef_count = 0;
606  size_t num_pos = num_pos_ >= 0 ? (size_t)num_pos_ : (size_t)0;
607  for (size_t ip = 0; ip < num_pos; ip++) {
608  int p = positions[ip];
609  if (p >= 0 && (size_t)p < num_indices) {
610  index[ip] = v[p];
611  } else {
612  index[ip] = undef_idx;
613  undef_count++;
614  }
615  }
616 
617  return undef_count;
618 }
619 
623 static int
625  int * position, int offset) {
626 
627  Xt_idxvec idxvec_obj = (Xt_idxvec)idxlist;
628 
629  *position = -1;
630 
631  size_t num_indices = (size_t)idxvec_obj->parent.num_indices;
632  if ((offset < 0) || ((size_t)offset >= num_indices))
633  return 1;
634 
635  const Xt_int *sorted_vector = get_sorted_vector(idxvec_obj);
636 
637  if ((index < sorted_vector[0]) ||
638  (index > sorted_vector[num_indices-1]))
639  return 1;
640 
641  // bisection to find one matching position:
642  size_t lb = 0;
643  size_t ub = num_indices - 1;
644 
645  while (sorted_vector[lb] < index) {
646 
647  size_t middle = (ub + lb + 1)/2;
648 
649  if (sorted_vector[middle] <= index)
650  lb = middle;
651  else if (ub == middle)
652  return 1;
653  else
654  ub = middle;
655  }
656 
657  // find left most match:
658  while (lb > 0 && sorted_vector[lb-1] == index) --lb;
659 
660  // go forward until offset condition is satisfied:
661  while (lb < num_indices - 1 && // boundary condition
662  idxvec_obj->sorted_vec_positions[lb] < offset && // ignore postions left of offset
663  sorted_vector[lb] == index) ++lb; // check if index is valid
664 
665  // check if position is invalid:
666  if (lb >= num_indices || sorted_vector[lb] != index)
667  return 1; // failure
668 
669  // result:
670  *position = idxvec_obj->sorted_vec_positions[lb];
671  return 0;
672 }
673 
674 static int
675 idxvec_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int * position) {
676 
677  return idxvec_get_position_of_index_off(idxlist, index, position, 0);
678 }
679 
680 static bool idx_vec_is_sorted(Xt_int const *idx, size_t n) {
681 
682  if (n>=2)
683  for (size_t i = 1; i < n; i++)
684  if (idx[i] < idx[i-1]) return false;
685 
686  return true;
687 }
688 
689 static int
691  const Xt_int *selection_idx,
692  int num_selection, int *positions,
693  int single_match_only) {
694 
695  if (num_selection <= 0) return 0;
696 
697  bool selection_is_ordered = idx_vec_is_sorted(selection_idx, (size_t)num_selection);
699 
700  Xt_int const *sorted_selection;
701  int *sorted_selection_pos = NULL;
702  Xt_int *tmp_idx = NULL;
703 
704  if (selection_is_ordered) {
705  sorted_selection = selection_idx;
706  } else {
707  size_t idx_memsize = (size_t)num_selection * sizeof(*sorted_selection),
708  pos_memsize = (size_t)num_selection * sizeof(*sorted_selection_pos),
709 #if defined _CRAYC && _RELEASE_MAJOR < 9
710  pos_ofs_roundup = _MAXVL_8,
711 #else
712  pos_ofs_roundup = sizeof(int),
713 #endif
714  /* round pos_memsize up to next multiple of sizeof (int) */
715  pos_ofs = ((idx_memsize + pos_ofs_roundup - 1)
716  & ((size_t)-(ssize_t)(pos_ofs_roundup))),
717  /* compute size of merged allocation */
718  alloc_size = pos_ofs + pos_memsize;
719 
720  tmp_idx = xmalloc(alloc_size);
721  memcpy(tmp_idx, selection_idx, idx_memsize);
722 
723  sorted_selection_pos
724  = (void *)((unsigned char *)tmp_idx + pos_ofs);
725  xt_quicksort_index(tmp_idx, num_selection, sorted_selection_pos, 1);
726  sorted_selection = tmp_idx;
727  }
728 
729  /* motivation for usage of single_match_only:
730  * on the target side we want single_match_only,
731  * on the source side we don't
732  */
733  Xt_idxvec body_idxvec = (Xt_idxvec)body_idxlist;
734  const Xt_int *sorted_body = get_sorted_vector(body_idxvec);
735  const int *sorted_body_pos = body_idxvec->sorted_vec_positions;
736  size_t search_end = (size_t)body_idxvec->parent.num_indices - 1;
737  int num_unmatched = 0;
738 
739  // after the match we will move on one step in order to avoid matching the same position again
740  size_t post_match_step = single_match_only != 0;
741 
742  size_t i=0;
743  for (size_t search_start = 0, ub_guess_ofs = 1;
744  i < (size_t)num_selection && search_start<=search_end;
745  ++i) {
746  size_t selection_pos = selection_is_ordered ? i : (size_t)sorted_selection_pos[i];
747  Xt_int isel = sorted_selection[i];
748  // bisection to find one matching position:
749  size_t ub = MIN(search_start + ub_guess_ofs, search_end);
750  size_t lb = search_start;
751  /* guess too low? */
752  if (sorted_body[ub] < isel) {
753  lb = MIN(ub + 1, search_end);
754  ub = search_end;
755  }
756  /* dividing (ub-lb) by 2 gives 0 iff (ub-lb) < 2 but uses less
757  * instructions than comparing to literal 1 */
758  while ((ub-lb)/16) {
759  size_t middle = (ub + lb + 1) / 2;
760  /* todo: make branch free with mask/inv mask by predicate */
761  if (sorted_body[middle] <= isel)
762  lb = middle;
763  else
764  ub = middle;
765  }
766  /* use linear scan for last part of search */
767  while (sorted_body[lb] < isel && lb < ub)
768  ++lb;
769  size_t match_pos;
770  // search is now narrowed to two positions, select one of them:
771  if (isel == sorted_body[lb]) {
772  match_pos = lb;
773  } else {
774  num_unmatched++;
775  positions[selection_pos] = -1;
776  continue;
777  }
778 
779  // find left most match >= search_start (bisection can lead to any match >= search_start)
780  while (match_pos > search_start && sorted_body[match_pos-1] == isel)
781  --match_pos;
782 
783  // result:
784  // update positions and prepare next search:
785  positions[selection_pos] = sorted_body_pos[match_pos];
786  ub_guess_ofs = match_pos - search_start;
787  search_start = match_pos + post_match_step;
788  }
789  if (i < (size_t)num_selection) {
790  num_unmatched += (int)((size_t)num_selection - i);
791  if (selection_is_ordered)
792  do {
793  positions[i] = -1;
794  } while (++i < (size_t)num_selection);
795  else
796  do {
797  positions[sorted_selection_pos[i]] = -1;
798  } while (++i < (size_t)num_selection);
799  }
800  if (tmp_idx) free(tmp_idx);
801 
802  return num_unmatched;
803 }
804 
805 static Xt_int
807 
808  Xt_idxvec idxvec_obj = (Xt_idxvec)idxlist;
809 
810 #ifndef NDEBUG
811  if (!idxvec_obj->parent.num_indices)
812  die("idxvec_get_min_index: empty index vector");
813 #endif
814 
815  return get_sorted_vector(idxvec_obj)[0];
816 }
817 
818 static Xt_int
820 
821  Xt_idxvec idxvec_obj = (Xt_idxvec)idxlist;
822 
823 #ifndef NDEBUG
824  if (!idxvec_obj->parent.num_indices)
825  die("idxvec_get_max_index: empty index vector");
826 #endif
827 
828  return get_sorted_vector(idxvec_obj)[idxvec_obj->parent.num_indices-1];
829 }
830 
831 /*
832  * Local Variables:
833  * c-basic-offset: 2
834  * coding: utf-8
835  * indent-tabs-mode: nil
836  * show-trailing-whitespace: t
837  * require-trailing-newline: t
838  * End:
839  */
int MPI_Comm
Definition: core.h:64
#define die(msg)
Definition: core.h:131
#define INSTR_STOP(T)
Definition: instr.h:69
#define INSTR_DEF(T, S)
Definition: instr.h:66
#define INSTR_START(T)
Definition: instr.h:68
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
Definition: ppm_xfuncs.h:71
#define xmalloc(size)
Definition: ppm_xfuncs.h:70
void xt_quicksort_index(Xt_int *v_idx, int n, int *v_pos, int reset_pos)
Definition: quicksort.c:80
void xt_quicksort_xt_int_permutation(Xt_int *a, size_t n, int *restrict permutation)
quicksort declaration
const Xt_int * sorted_vector
Definition: xt_idxvec.c:152
struct Xt_idxlist_ parent
Definition: xt_idxvec.c:147
int * sorted_vec_positions
Definition: xt_idxvec.c:153
const Xt_int * vector
Definition: xt_idxvec.c:149
Xt_int stride
Definition: xt_stripe.h:56
int nstrides
Definition: xt_stripe.h:57
Xt_int start
Definition: xt_stripe.h:55
void(* delete)(Xt_idxlist)
static Xt_int Xt_isign_mask(Xt_int x)
base definitions header file
int xt_initialized(void)
Definition: xt_init.c:107
#define Xt_int_dt
Definition: xt_core.h:69
XT_INT Xt_int
Definition: xt_core.h:68
struct Xt_idxlist_ * Xt_idxlist
Definition: xt_core.h:80
Xt_idxlist xt_idxempty_new(void)
Definition: xt_idxempty.c:165
index list declaration
Provide non-public declarations common to all index lists.
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
@ VECTOR
static Xt_int idxvec_get_min_index(Xt_idxlist idxlist)
Definition: xt_idxvec.c:806
Xt_idxlist xt_idxvec_prealloc_new(const Xt_int *idxvec, int num_indices)
Definition: xt_idxvec.c:192
static int idxvec_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
Definition: xt_idxvec.c:675
static void idxvec_delete(Xt_idxlist data)
Definition: xt_idxvec.c:388
static int idxvec_get_positions_of_indices(Xt_idxlist idxlist, const Xt_int *indices, int num_indices, int *positions, int single_match_only)
Definition: xt_idxvec.c:690
static void idxvec_get_indices(Xt_idxlist idxlist, Xt_int *indices)
Definition: xt_idxvec.c:555
Xt_idxlist xt_idxvec_new(const Xt_int *idxvec, int num_indices)
Definition: xt_idxvec.c:163
#define MIN(a, b)
Definition: xt_idxvec.c:72
static void idxvec_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
Definition: xt_idxvec.c:572
static Xt_idxlist idxvec_copy(Xt_idxlist idxlist)
Definition: xt_idxvec.c:547
static int idxvec_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
Definition: xt_idxvec.c:583
static void idxvec_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
Definition: xt_idxvec.c:408
static size_t idxvec_get_pack_size(Xt_idxlist data, MPI_Comm comm)
Definition: xt_idxvec.c:396
static bool idx_vec_is_sorted(Xt_int const *idx, size_t n)
Definition: xt_idxvec.c:680
static int idxvec_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static int idxvec_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
Definition: xt_idxvec.c:624
static const struct xt_idxlist_vtable idxvec_vtable
Definition: xt_idxvec.c:122
static Xt_int const * idxvec_get_indices_const(Xt_idxlist idxlist)
Definition: xt_idxvec.c:564
struct Xt_idxvec_ * Xt_idxvec
Definition: xt_idxvec.c:142
static const Xt_int * get_sorted_vector(Xt_idxvec idxvec)
Definition: xt_idxvec.c:455
Xt_idxlist xt_idxvec_from_stripes_new(const struct Xt_stripe stripes[], int num_stripes)
Definition: xt_idxvec.c:352
static Xt_int idxvec_get_max_index(Xt_idxlist idxlist)
Definition: xt_idxvec.c:819
static size_t decode_stripe(struct Xt_stripe stripe, Xt_int *sorted_vector, int *sorted_vec_pos, int pos_offset)
Definition: xt_idxvec.c:209
static void generate_sorted_vector_from_stripes(const struct Xt_stripe stripes[], int num_stripes_, Xt_idxvec idxvec)
Definition: xt_idxvec.c:234
Xt_idxlist xt_idxvec_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
Definition: xt_idxvec.c:423
#define MAX(a, b)
Definition: xt_idxvec.c:231
Xt_idxlist xt_idxvec_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
Definition: xt_idxvec.c:492
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition: xt_mpi.h:68
void xt_convert_indices_to_stripes(const Xt_int *indices, int num_indices, struct Xt_stripe **stripes, int *num_stripes)