Yet Another eXchange Tool  0.9.0
xt_xmap_intersection_ext.c
Go to the documentation of this file.
1 
12 /*
13  * Keywords:
14  * Maintainer: Jörg Behrens <behrens@dkrz.de>
15  * Moritz Hanke <hanke@dkrz.de>
16  * Thomas Jahns <jahns@dkrz.de>
17  * URL: https://doc.redmine.dkrz.de/yaxt/html/
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are
21  * met:
22  *
23  * Redistributions of source code must retain the above copyright notice,
24  * this list of conditions and the following disclaimer.
25  *
26  * Redistributions in binary form must reproduce the above copyright
27  * notice, this list of conditions and the following disclaimer in the
28  * documentation and/or other materials provided with the distribution.
29  *
30  * Neither the name of the DKRZ GmbH nor the names of its contributors
31  * may be used to endorse or promote products derived from this software
32  * without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  */
46 #ifdef HAVE_CONFIG_H
47 #include <config.h>
48 #endif
49 
50 #include <assert.h>
51 #include <limits.h>
52 #include <stdbool.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 
57 #include <mpi.h>
58 
59 #include "xt/xt_idxlist.h"
60 #include "xt/xt_idxvec.h"
61 #include "xt/xt_xmap.h"
62 #include "xt_xmap_internal.h"
63 #include "xt/xt_mpi.h"
64 #include "xt_mpi_internal.h"
65 #include "core/core.h"
66 #include "core/ppm_xfuncs.h"
69 #include "xt_arithmetic_util.h"
70 #include "ensure_array_size.h"
71 #include "xt_cover.h"
72 #include "xt/quicksort.h"
73 
77 static void
79 static void
84 static void xmap_intersection_ext_delete(Xt_xmap xmap);
87 static Xt_xmap
89 static Xt_xmap
91  const int * src_positions,
92  const int * dst_positions);
93 static Xt_xmap
94 xmap_intersection_ext_spread(Xt_xmap xmap, int num_repetitions,
95  const int src_displacements[num_repetitions],
96  const int dst_displacements[num_repetitions]);
97 
98 
99 static const struct Xt_xmap_vtable xmap_intersection_vtable = {
101  .get_num_destinations = xmap_intersection_ext_get_num_destinations,
102  .get_num_sources = xmap_intersection_ext_get_num_sources,
103  .get_destination_ranks = xmap_intersection_ext_get_destination_ranks,
104  .get_source_ranks = xmap_intersection_ext_get_source_ranks,
105  .get_out_iterator = xmap_intersection_ext_get_out_iterator,
106  .get_in_iterator = xmap_intersection_ext_get_in_iterator,
109  .get_max_src_pos = xmap_intersection_ext_get_max_src_pos,
110  .get_max_dst_pos = xmap_intersection_ext_get_max_dst_pos,
113  .spread = xmap_intersection_ext_spread};
114 
115 struct exchange_ext {
116  // list of relative position extents in index list to send or receive
118  /* generated on-demand */
121  int rank;
122 };
123 
125 
126  const struct Xt_xmap_vtable * vtable;
127 
128  int n_in, n_out;
129 
130  // we need the max position in order to enable quick range-checks
131  // for xmap-users like redist
132  int max_src_pos; // max possible pos over all src transfer_pos (always >= 0)
133  int max_dst_pos; // same for dst
134  int tag_offset; /* offset to add to message tags for uniqueness */
136  struct exchange_ext msg[];
137 };
138 
140 
141 static inline Xt_xmap_intersection_ext xmie(void *xmap)
142 {
143  return (Xt_xmap_intersection_ext)xmap;
144 }
145 
147 {
148  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
149  return xmap_intersection_ext->comm;
150 }
151 
153 {
154  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
155  // the number of destination equals the number of source messages
156  return xmap_intersection_ext->n_out;
157 }
158 
160 {
161  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
162  // the number of sources equals the number of destination messages
163  return xmap_intersection_ext->n_in;
164 }
165 
166 static void
168 {
169  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
170  size_t n_out = (size_t)xmap_intersection_ext->n_out;
171  const struct exchange_ext *restrict out_msg
172  = xmap_intersection_ext->msg + xmap_intersection_ext->n_in;
173  for (size_t i = 0; i < n_out; ++i)
174  ranks[i] = out_msg[i].rank;
175 }
176 
177 static void
179 {
180  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
181  size_t n_in = (size_t)xmap_intersection_ext->n_in;
182  const struct exchange_ext *restrict in_msg = xmap_intersection_ext->msg;
183  for (size_t i = 0; i < n_in; ++i)
184  ranks[i] = in_msg[i].rank;
185 }
186 
188  return xmie(xmap)->max_src_pos;
189 }
190 
192  return xmie(xmap)->max_dst_pos;
193 }
194 
195 typedef int (*Xt_pos_ext_copy)(size_t num_orig_pos_ext,
196  size_t *num_pos_ext,
197  struct Xt_pos_ext **pos_ext,
198  const struct Xt_pos_ext *orig_pos_ext,
199  size_t num_orig_pos, const int *orig_pos,
200  void *state);
201 
202 static int
203 pos_ext_copy_verbatim(size_t num_orig_pos_ext,
204  size_t *num_pos_ext,
205  struct Xt_pos_ext **pos_ext,
206  const struct Xt_pos_ext *orig_pos_ext,
207  size_t num_orig_pos, const int *orig_pos,
208  void *state)
209 {
210  (void)state; (void)num_orig_pos; (void)orig_pos;
211  size_t size_pos_ext = num_orig_pos_ext * sizeof (**pos_ext);
212  struct Xt_pos_ext *pos_ext_ = *pos_ext = xmalloc(size_pos_ext);
213  memcpy(pos_ext_, orig_pos_ext, size_pos_ext);
214  *num_pos_ext = num_orig_pos_ext;
215  return -1;
216 }
217 
218 static void
220  const struct exchange_ext *restrict msg,
221  int *nmsg_copy,
222  struct exchange_ext *restrict msg_copy,
223  int *max_pos_, int num_repetitions,
224  Xt_pos_ext_copy pos_ext_copy, void *pec_state)
225 {
226  *nmsg_copy = (int)nmsg;
227  int max_pos = 0;
228  for (size_t i = 0; i < nmsg; ++i) {
229  msg_copy[i].num_transfer_pos = num_repetitions * msg[i].num_transfer_pos;
230  msg_copy[i].rank = msg[i].rank;
231  msg_copy[i].transfer_pos = NULL;
232  size_t num_transfer_pos_ext;
233  int new_max_pos
234  = pos_ext_copy((size_t)msg[i].num_transfer_pos_ext, &num_transfer_pos_ext,
235  &msg_copy[i].transfer_pos_ext, msg[i].transfer_pos_ext,
236  (size_t)msg[i].num_transfer_pos, msg[i].transfer_pos,
237  pec_state);
238  if (new_max_pos > max_pos)
239  max_pos = new_max_pos;
240  msg_copy[i].num_transfer_pos_ext = (int)num_transfer_pos_ext;
241  }
242  if (pos_ext_copy != pos_ext_copy_verbatim)
243  *max_pos_ = max_pos;
244 }
245 
246 static Xt_xmap
247 xmap_intersection_ext_copy_(Xt_xmap xmap, int num_repetitions,
248  Xt_pos_ext_copy pe_cpy_in, void *peci_state,
249  Xt_pos_ext_copy pe_cpy_out, void *peco_state)
250 {
251  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap),
252  xmap_intersection_ext_new;
253  size_t n_in = (size_t)xmap_intersection_ext->n_in,
254  n_out = (size_t)xmap_intersection_ext->n_out,
255  num_isect = n_in + n_out;
256  xmap_intersection_ext_new
257  = xmalloc(sizeof (*xmap_intersection_ext_new)
258  + num_isect * sizeof (struct exchange_ext));
259  xmap_intersection_ext_new->vtable = xmap_intersection_ext->vtable;
260  xmap_intersection_ext_new->n_in = (int)n_in;
261  xmap_intersection_ext_new->n_out = (int)n_out;
262  xmap_intersection_ext_new->max_src_pos = xmap_intersection_ext->max_src_pos;
263  xmap_intersection_ext_new->max_dst_pos = xmap_intersection_ext->max_dst_pos;
264  xmap_intersection_ext_msg_copy(n_in, xmap_intersection_ext->msg,
265  &xmap_intersection_ext_new->n_in,
266  xmap_intersection_ext_new->msg,
267  &xmap_intersection_ext_new->max_dst_pos,
268  num_repetitions, pe_cpy_in, peci_state);
269  xmap_intersection_ext_msg_copy(n_out, xmap_intersection_ext->msg+n_in,
270  &xmap_intersection_ext_new->n_out,
271  xmap_intersection_ext_new->msg+n_in,
272  &xmap_intersection_ext_new->max_src_pos,
273  num_repetitions, pe_cpy_out, peco_state);
274  xmap_intersection_ext_new->comm
275  = xt_mpi_comm_smart_dup(xmap_intersection_ext->comm,
276  &xmap_intersection_ext_new->tag_offset);
277  return (Xt_xmap)xmap_intersection_ext_new;
278 }
279 
280 static Xt_xmap
282 {
283  return xmap_intersection_ext_copy_(xmap, 1,
284  pos_ext_copy_verbatim, NULL,
285  pos_ext_copy_verbatim, NULL);
286 }
287 
288 
289 static void
290 xt_free_exchange_ext(size_t num_msg, struct exchange_ext *restrict msg)
291 {
292  for (size_t i = 0; i < num_msg; ++i) {
293  free(msg[i].transfer_pos);
294  free(msg[i].transfer_pos_ext);
295  }
296 }
297 
299 
300  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
301  size_t num_isect = (size_t)xmap_intersection_ext->n_in
302  + (size_t)xmap_intersection_ext->n_out;
303  xt_free_exchange_ext(num_isect, xmap_intersection_ext->msg);
304  xt_mpi_comm_smart_dedup(&xmap_intersection_ext->comm,
305  xmap_intersection_ext->tag_offset);
306  free(xmap_intersection_ext);
307 }
308 
309 static void
311  int num_src_intersections,
312  const struct Xt_com_list src_com[num_src_intersections],
313  int num_dst_intersections,
314  const struct Xt_com_list dst_com[num_dst_intersections],
315  Xt_idxlist src_idxlist_local,
316  Xt_idxlist dst_idxlist_local,
317  MPI_Comm comm);
318 
319 Xt_xmap
320 xt_xmap_intersection_ext_new(int num_src_intersections,
321  const struct Xt_com_list
322  src_com[num_src_intersections],
323  int num_dst_intersections,
324  const struct Xt_com_list
325  dst_com[num_dst_intersections],
326  Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist,
327  MPI_Comm comm) {
328 
329  // ensure that yaxt is initialized
330  assert(xt_initialized());
331 
332  size_t num_isect
333  = (size_t)num_dst_intersections + (size_t)num_src_intersections;
335  = xmalloc(sizeof (*xmap) + num_isect * sizeof (struct exchange_ext));
336 
338 
339  xmap->comm = comm = xt_mpi_comm_smart_dup(comm, &xmap->tag_offset);
340 
341  // generate exchange lists
343  num_src_intersections, src_com,
344  num_dst_intersections, dst_com,
345  src_idxlist, dst_idxlist, comm);
346 
347  size_t new_num_isect = (size_t)xmap->n_in + (size_t)xmap->n_out;
348  if (new_num_isect != num_isect)
349  xmap = xrealloc(xmap, sizeof (*xmap) + (new_num_isect
350  * sizeof(struct exchange_ext)));
351 
352  xmap->max_dst_pos = xt_idxlist_get_num_indices(dst_idxlist) - 1;
353 
354  return (Xt_xmap)xmap;
355 }
356 
357 struct ted_result {
358  struct Xt_pos_ext_vec cover;
359  int resCount;
360 };
361 
362 static struct ted_result
364  int num_intersections,
365  const struct Xt_com_list intersections[num_intersections],
366  Xt_idxlist mypart_idxlist,
367  struct exchange_ext *resSets,
368  int (*restrict dst_removals_per_intersection)[2]);
369 
370 
371 static struct Xt_pos_ext *
373  int num_src_intersections,
374  const struct Xt_com_list src_com[num_src_intersections],
375  int num_dst_intersections,
376  const struct Xt_com_list dst_com[num_dst_intersections],
377  struct exchange_ext dst_ext[num_dst_intersections],
378  int (*restrict src_removals_per_intersection)[2],
379  const int (*restrict dst_removals_per_intersection)[2],
380  int tag_offset,
381  MPI_Comm comm);
382 
383 static void
384 remap_dst_intersections(int num_dst_intersections,
385  const struct Xt_com_list dst_com[num_dst_intersections],
386  Xt_idxlist mypart_idxlist,
387  int resCount,
388  struct exchange_ext resSets[resCount],
389  const int (*removals_per_intersection)[2]);
390 
391 struct tes_result {
392  int resCount;
393  int max_pos;
394 };
395 
396 static struct tes_result
398  int num_intersections,
399  const struct Xt_com_list intersections[num_intersections],
400  Xt_idxlist mypart_idxlist,
401  struct exchange_ext *resSets,
402  const int (*restrict removals_per_intersection)[2],
403  const struct Xt_pos_ext *pos_updates);
404 
405 static void
407  int num_src_intersections,
408  const struct Xt_com_list src_com[num_src_intersections],
409  int num_dst_intersections,
410  const struct Xt_com_list dst_com[num_dst_intersections],
411  Xt_idxlist src_idxlist,
412  Xt_idxlist dst_idxlist,
413  MPI_Comm comm) {
414 
415  /* {dst|src}_removals_per_intersection[i][0] denotes the number of
416  * indices to be removed from the intersection with {src|dst}_com[i].rank.
417  * {dst|src}_removals_per_intersection[rank][1] denotes the number of
418  * pos_ext needed to represent this change (0 if either none or all
419  * indices got removed).
420  */
421  int (*src_removals_per_intersection)[2] =
422  xmalloc(((size_t)num_dst_intersections + (size_t)num_src_intersections)
423  * sizeof(*src_removals_per_intersection)),
424  (*dst_removals_per_intersection)[2]
425  = src_removals_per_intersection + num_src_intersections;
426 
427  {
428  struct ted_result tedr
430  num_dst_intersections, dst_com, dst_idxlist,
431  xmap->msg, dst_removals_per_intersection);
432  struct Xt_pos_ext_vec cover = tedr.cover;
433  if (!xt_idxlist_pos_ext_is_full_cover(dst_idxlist, cover)) {
434  if (xt_idxlist_get_num_indices(dst_idxlist) == 0)
435  Xt_abort(comm, "ERROR: ups...this should not have happend...", __FILE__,
436  __LINE__);
437  int first_missing_pos
438  = ((cover.num_pos_ext > 0) && (cover.pos_ext[0].start == 0))
439  ? cover.pos_ext[0].start + cover.pos_ext[0].size : 0;
440  print_miss_msg(dst_idxlist, first_missing_pos, comm, __FILE__, __LINE__);
441  }
442  xt_cover_finish(&cover);
443  xmap->n_in = tedr.resCount;
444  }
445 
446  // exchange pos_ext of lists where additional indices need to be removed
447  struct Xt_pos_ext *pos_updates
449  num_src_intersections, src_com, num_dst_intersections, dst_com, xmap->msg,
450  src_removals_per_intersection,
451  (const int (*)[2])dst_removals_per_intersection, xmap->tag_offset, comm);
452 
453  remap_dst_intersections(num_dst_intersections, dst_com, dst_idxlist,
454  xmap->n_in, xmap->msg,
455  (const int (*)[2])dst_removals_per_intersection);
456 
457  src_removals_per_intersection =
458  xrealloc(src_removals_per_intersection, (size_t)num_src_intersections
459  * sizeof(*src_removals_per_intersection));
460 
461  struct tes_result tesr
463  num_src_intersections, src_com, src_idxlist, xmap->msg+xmap->n_in,
464  (const int (*)[2])src_removals_per_intersection, pos_updates);
465  xmap->n_out = tesr.resCount;
466  xmap->max_src_pos = tesr.max_pos;
467  free(src_removals_per_intersection);
468  free(pos_updates);
469 }
470 
473 };
474 
475 static struct Xt_pos_ext_overlap
476 Xt_get_pos_ext_overlap(struct Xt_pos_ext a, struct Xt_pos_ext b)
477 {
478  /* == 0 if a.size >= 0 ; == ~0 if a.size < 0 */
479  int aSizeMaskNeg = isign_mask(a.size),
480  /* compute start and end indices of ranges */
481  a_s = a.start + (aSizeMaskNeg & (a.size + 1)),
482  a_e = a.start + (~aSizeMaskNeg & (a.size - 1)),
483  bSizeMaskNeg = isign_mask(b.size),
484  b_s = b.start + (bSizeMaskNeg & (b.size + 1)),
485  b_e = b.start + (~bSizeMaskNeg & (b.size - 1));
486  /* does overlap exist? */
487  if ((b_s > a_e) | (a_s > b_e))
488  return (struct Xt_pos_ext_overlap){ a.size, 0, 0};
489  else {
490  /* determine length of overlap parts */
491  int lowSkipA = b_s - a_s;
492  int lowSkipB = -lowSkipA;
493  lowSkipA = (int)((unsigned)(lowSkipA + abs(lowSkipA))/2U);
494  lowSkipB = (int)((unsigned)(lowSkipB + abs(lowSkipB))/2U);
495  int overlapLen = imin(b_e - b_s - lowSkipB + 1,
496  abs(a.size) - lowSkipA);
497  int highSkipA = abs(a.size) - lowSkipA - overlapLen;
498  /* then adjust lengths to direction of overlap (from
499  * perspective of a */
500  int aSkipLen = (~aSizeMaskNeg & lowSkipA)
501  | (aSizeMaskNeg & -highSkipA),
502  aTailLen = (aSizeMaskNeg & -lowSkipA)
503  | (~aSizeMaskNeg & highSkipA);
504  return (struct Xt_pos_ext_overlap){ aSkipLen, overlapLen, aTailLen };
505  }
506 }
507 
508 
509 
510 static void
512  struct Xt_pos_ext_vec *pos_exts);
513 
514 static struct Xt_pos_ext *
516  int num_stripes,
517  const struct Xt_stripe stripes[num_stripes],
518  int *num_ext,
519  int single_match_only)
520 {
521  struct Xt_pos_ext *pos_ext;
522 #ifndef NDEBUG
523  int retval =
524 #endif
526  idxlist, num_stripes, stripes,
527  num_ext, &pos_ext, single_match_only);
528  assert(retval == 0);
529  return pos_ext;
530 }
531 
532 static struct ted_result
534  int num_intersections,
535  const struct Xt_com_list intersections[num_intersections],
536  Xt_idxlist mypart_idxlist,
537  struct exchange_ext *restrict resSets,
538  int (*restrict dst_removals_per_intersection)[2])
539 {
540  int new_num_intersections = 0;
541 
542  // we have to enforce single_match_only not only within a single
543  // intersection, but also between all intersections
544  /* ranges already covered from previous intersections, i.e. which
545  * must not be transmitted twice */
546  enum { initial_vec_size = 8 };
547  struct Xt_pos_ext_vec cover;
548  xt_cover_start(&cover, initial_vec_size);
549 
550  for (int i = 0; i < num_intersections; ++i) {
551 
552  int num_stripes, num_indices_to_remove = 0;
553  struct Xt_stripe *intersection_idxstripes;
554  xt_idxlist_get_index_stripes(intersections[i].list,
555  &intersection_idxstripes, &num_stripes);
556  int num_isect_pos_exts;
557  struct Xt_pos_ext *restrict isect_pos_exts
559  mypart_idxlist, num_stripes, intersection_idxstripes,
560  &num_isect_pos_exts, 1);
561  int isect_pos_exts_size_psum = 0;
562  int intersection_size = xt_idxlist_get_num_indices(intersections[i].list);
563  /* start with all indices from intersection as used,
564  later split ranges, if overlaps are found */
565  struct Xt_pos_ext_vec transferable
566  = (struct Xt_pos_ext_vec){ .num_pos_ext = 1,
567  .size_pos_ext = initial_vec_size,
568  .pos_ext = xrealloc(intersection_idxstripes,
569  sizeof (struct Xt_pos_ext) * initial_vec_size) };
570  intersection_idxstripes = NULL;
571  transferable.pos_ext[0]
572  = (struct Xt_pos_ext){ .start = 0, .size = intersection_size };
573  /* find overlap(s) with previously found ranges for all
574  * stripes mapped to position extents */
575  for (size_t j = 0; j < (size_t)num_isect_pos_exts; ++j) {
576  struct Xt_pos_ext isect_pos_ext = isect_pos_exts[j];
577  /* ensure isect_pos_ext is oriented with ascending positions */
578  int isign_mask_isect_pos_ext_size = isign_mask(isect_pos_ext.size);
579  isect_pos_ext.start
580  += isign_mask_isect_pos_ext_size & (isect_pos_ext.size + 1);
581  int isect_pos_ext_orig_size = isect_pos_ext.size;
582  isect_pos_ext.size = abs(isect_pos_ext.size);
583  isect_pos_exts_size_psum += isect_pos_ext.size;
584  /* keep progress as inverse of change to psum to compensate for
585  * eventual correction later */
586  int progress = -isect_pos_ext.size;
587  size_t search_start_pos = 0, insert_pos;
588  do {
589  struct Xt_pos_range query = (struct Xt_pos_range){
590  .start = isect_pos_ext.start,
591  .end = isect_pos_ext.start + isect_pos_ext.size - 1 };
592  insert_pos
593  = xt_cover_insert_or_overlap(&cover, query, true, search_start_pos);
594  if (insert_pos == SIZE_MAX)
595  goto next_isect_pos_ext;
596  struct Xt_pos_ext_overlap overlap_desc
597  = Xt_get_pos_ext_overlap(isect_pos_ext, cover.pos_ext[insert_pos]);
598  /* insert overlap into updates
599  * by ...*/
600  /* ...first inserting the skipped part into
601  * cover.pos_ext, since that is sorted
602  * and obviously precedes cover.pos_ext[insert_pos],
603  * cover.pos_ext[insert_pos] can be seemlessly extended...
604  */
605  cover.pos_ext[insert_pos].start -= overlap_desc.skip;
606  cover.pos_ext[insert_pos].size += overlap_desc.skip;
607  /* ...and optionally merged with its predecessor, if the
608  * intervening range becomes zero, ... */
609  if (insert_pos > 0
610  && (cover.pos_ext[insert_pos].start
611  == (cover.pos_ext[insert_pos - 1].start
612  + cover.pos_ext[insert_pos - 1].size)))
613  {
614  cover.pos_ext[insert_pos - 1].size
615  += cover.pos_ext[insert_pos].size;
616  memmove(cover.pos_ext + insert_pos, cover.pos_ext + insert_pos + 1,
617  (--cover.num_pos_ext - insert_pos)
618  * sizeof (*cover.pos_ext));
619  --insert_pos;
620  }
621  progress = (~isign_mask_isect_pos_ext_size
622  & (progress + overlap_desc.skip))
623  | (isign_mask_isect_pos_ext_size
624  & (isect_pos_ext_orig_size + overlap_desc.tail));
625  /* ... then splitting transferable accordingly, ... */
626  num_indices_to_remove += overlap_desc.overlap;
628  .start = isect_pos_exts_size_psum + progress,
629  .size = overlap_desc.overlap }, &transferable);
630  progress += overlap_desc.overlap;
631  /* ... lastly the search can continue with the tail ... */
632  isect_pos_ext.start += overlap_desc.skip + overlap_desc.overlap;
633  /* ... if there is any */
634  isect_pos_ext.size = overlap_desc.tail;
635  search_start_pos = ++insert_pos;
636  } while ((isect_pos_ext.size != 0)
637  & (search_start_pos != cover.num_pos_ext));
638  if (isect_pos_ext.size)
639  /* already at end of list -> append ... */
640  xt_cover_range_append(&cover, isect_pos_ext);
641  /* ... and start the next intersection range */
642  next_isect_pos_ext:
643  ;
644  }
645 
646  if (intersection_size > num_indices_to_remove) {
647  resSets[new_num_intersections].transfer_pos_ext
648  = xrealloc(transferable.pos_ext, sizeof (struct Xt_pos_ext)
649  * transferable.num_pos_ext);
650  /* start with empty cache of positions to transfer */
651  resSets[new_num_intersections].transfer_pos = NULL;
652  resSets[new_num_intersections].num_transfer_pos
653  = intersection_size - num_indices_to_remove;
654  resSets[new_num_intersections].num_transfer_pos_ext
655  = (int)transferable.num_pos_ext;
656  resSets[new_num_intersections].rank = intersections[i].rank;
657  ++new_num_intersections;
658  } else
659  free(transferable.pos_ext);
660  dst_removals_per_intersection[i][0] = num_indices_to_remove;
661  dst_removals_per_intersection[i][1]
662  = ((num_indices_to_remove == intersection_size)
663  | (num_indices_to_remove == 0))?0:(int)transferable.num_pos_ext;
664  free(isect_pos_exts);
665  }
666  /* since cover is a struct, at least pgcc 11-13 cannot compile this with a
667  * compound literal or initialize r directly */
668 #if defined __PGI && __PGIC__ <= 13
669  struct ted_result r;
670  r.cover = cover;
671  r.resCount = new_num_intersections;
672  return r;
673 #else
674  return (struct ted_result){
675  .cover = cover,
676  .resCount = new_num_intersections };
677 #endif
678 }
679 
680 static void
682  struct Xt_pos_ext_vec *pos_exts)
683 {
684  struct Xt_pos_ext *restrict pos_exts_ = pos_exts->pos_ext;
685  size_t num_pos_exts_ = pos_exts->num_pos_ext;
686  size_t i = num_pos_exts_;
687  while (pos_exts_[--i].start > pos_ext.start)
688  ;
689  int db_skip = pos_ext.start - pos_exts_[i].start;
690  if ((!db_skip) & (pos_ext.size == pos_exts_[i].size))
691  {
692  /* delete fully overlapped transfer part */
693  memmove(pos_exts_ + i, pos_exts_ + i + 1,
694  sizeof (*pos_exts_) * (num_pos_exts_ - i - 1));
695  pos_exts->num_pos_ext = --num_pos_exts_;
696  }
697  else if (db_skip + pos_ext.size == pos_exts_[i].size)
698  {
699  /* pos_ext overlaps end of pos_exts_[i] */
700  pos_exts_[i].size -= pos_ext.size;
701  }
702  else if (db_skip == 0)
703  {
704  /* pos_ext overlaps start of pos_exts_[i] */
705  pos_exts_[i].start = pos_ext.start + pos_ext.size;
706  pos_exts_[i].size -= pos_ext.size;
707  }
708  else
709  {
710  struct Xt_pos_ext orig = pos_exts_[i];
711  ENSURE_ARRAY_SIZE(pos_exts->pos_ext, pos_exts->size_pos_ext,
712  num_pos_exts_ + 1);
713  pos_exts_ = pos_exts->pos_ext;
714  memmove(pos_exts_ + i + 1, pos_exts_ + i,
715  (num_pos_exts_ - i) * sizeof (*pos_exts_));
716  pos_exts_[i] = (struct Xt_pos_ext){.start = orig.start,
717  .size = db_skip };
718  pos_exts_[i + 1] = (struct Xt_pos_ext){
719  .start = pos_ext.start + pos_ext.size,
720  .size = orig.size - db_skip - pos_ext.size };
721  pos_exts->num_pos_ext = ++num_pos_exts_;
722  }
723 }
724 
725 static struct Xt_pos_ext *
727  int num_src_intersections,
728  const struct Xt_com_list src_com[num_src_intersections],
729  int num_dst_intersections,
730  const struct Xt_com_list dst_com[num_dst_intersections],
731  struct exchange_ext dst_ext[num_dst_intersections],
732  int (*restrict src_removals_per_intersection)[2],
733  const int (*restrict dst_removals_per_intersection)[2],
734  int tag_offset,
735  MPI_Comm comm)
736 {
737  MPI_Request * requests
738  = xmalloc((size_t)(num_src_intersections + 2 * num_dst_intersections) *
739  sizeof(*requests));
740  MPI_Request *restrict send_header_requests = requests,
741  *restrict recv_requests = requests + num_dst_intersections,
742  *restrict send_data_requests = recv_requests + num_src_intersections;
743 
744  // set up receives for indices that need to be removed from the send messages
745  for (int i = 0; i < num_src_intersections; ++i)
746  xt_mpi_call(MPI_Irecv(
747  src_removals_per_intersection[i], 2, MPI_INT, src_com[i].rank,
749  comm, recv_requests + i), comm);
750 
751  /* send rebuilt pos_ext vectors that needed to be modified on the
752  * target side due to duplicated receives
753  */
754  unsigned num_active_dst = 0, num_dst_changes = 0;
755  for (int i = 0; i < num_dst_intersections; ++i) {
756  xt_mpi_call(MPI_Isend(
757  CAST_MPI_SEND_BUF(dst_removals_per_intersection[i]),
758  2, MPI_INT, dst_com[i].rank,
760  comm, send_header_requests + i), comm);
761 
762  if (dst_removals_per_intersection[i][1] > 0) {
763 
764  assert(dst_removals_per_intersection[i][1]
765  == dst_ext[num_active_dst].num_transfer_pos_ext
766  && dst_com[i].rank == dst_ext[num_active_dst].rank);
767  xt_mpi_call(MPI_Isend(
768  dst_ext[num_active_dst].transfer_pos_ext,
769  dst_removals_per_intersection[i][1],
770  MPI_2INT, dst_com[i].rank,
772  comm, send_data_requests + num_dst_changes),
773  comm);
774  ++num_dst_changes;
775  }
776  num_active_dst += (unsigned)((dst_removals_per_intersection[i][0] == 0)
777  | (dst_removals_per_intersection[i][1] != 0));
778  }
779 
780  // wait for the receiving of headers to complete
781  xt_mpi_call(MPI_Waitall(num_src_intersections + num_dst_intersections,
782  requests, MPI_STATUSES_IGNORE), comm);
783 
784  size_t total_num_pos_ext_to_recv = 0;
785 
786  for (size_t i = 0; i < (size_t)num_src_intersections; ++i)
787  total_num_pos_ext_to_recv += (size_t)src_removals_per_intersection[i][1];
788 
789  struct Xt_pos_ext *src_updated_pos_ext;
790  unsigned num_src_changes = 0;
791  if (total_num_pos_ext_to_recv > 0) {
792 
793  src_updated_pos_ext
794  = xmalloc(total_num_pos_ext_to_recv * sizeof(*src_updated_pos_ext));
795 
796  /* set up receive for pos_ext that need to be modified because
797  * indices needed to be removed from the intersection */
798  size_t offset = 0;
799  for (int i = 0; i < num_src_intersections; ++i)
800  if (src_removals_per_intersection[i][1] > 0) {
801  ++num_src_changes;
802  xt_mpi_call(MPI_Irecv(
803  src_updated_pos_ext + offset,
804  src_removals_per_intersection[i][1], MPI_2INT,
805  src_com[i].rank,
807  comm, send_data_requests - num_src_changes), comm);
808 
809  offset += (size_t)src_removals_per_intersection[i][1];
810  }
811  } else
812  src_updated_pos_ext = NULL;
813 
814  // wait until all communication is completed
815  xt_mpi_call(MPI_Waitall((int)num_src_changes + (int)num_dst_changes,
816  send_data_requests - num_src_changes,
817  MPI_STATUSES_IGNORE), comm);
818 
819  free(requests);
820  return src_updated_pos_ext;
821 }
822 
823 static void
824 remap_intersection(Xt_idxlist mypart_idxlist,
825  Xt_idxlist intersection,
826  size_t num_pos_updates,
827  const struct Xt_pos_ext pos_updates[num_pos_updates],
828  struct exchange_ext *resSet,
829  int single_match_only);
830 
831 static void
833  int num_dst_intersections,
834  const struct Xt_com_list intersections[num_dst_intersections],
835  Xt_idxlist mypart_idxlist,
836  int resCount,
837  struct exchange_ext resSets[resCount],
838  const int (*removals_per_intersection)[2])
839 {
840  size_t resIdx = 0;
841  for (size_t i = 0; i < (size_t)num_dst_intersections; ++i)
842  {
843  int intersection_size
844  = xt_idxlist_get_num_indices(intersections[i].list);
845 
846  int num_indices_to_remove = removals_per_intersection[i][0];
847 
848  if (num_indices_to_remove != intersection_size) {} else
849  /* intersection is made redundant */
850  continue;
851 
852  struct Xt_pos_ext *pos_updates = resSets[resIdx].transfer_pos_ext;
853  remap_intersection(mypart_idxlist, intersections[i].list,
854  (size_t)removals_per_intersection[i][1],
855  pos_updates, resSets + resIdx, 1);
856  free(pos_updates);
857  ++resIdx;
858  }
859  assert(resIdx == (size_t)resCount);
860 }
861 
862 static inline int
863 pos_ext_find_max_pos(int num_pos_ext,
864  const struct Xt_pos_ext *restrict pos_ext)
865 {
866  int max_pos = -1;
867  for (size_t i = 0; i < (size_t)num_pos_ext; ++i) {
868  int start = pos_ext[i].start,
869  size = pos_ext[i].size,
870  max = size > 0 ? start + size - 1 : start;
871  if (max > max_pos) max_pos = max;
872  }
873  return max_pos;
874 }
875 
876 /* compute updated positions for send direction */
877 static struct tes_result
879  int num_intersections,
880  const struct Xt_com_list intersections[num_intersections],
881  Xt_idxlist mypart_idxlist,
882  struct exchange_ext *resSets,
883  const int (*restrict removals_per_intersection)[2],
884  const struct Xt_pos_ext *pos_updates)
885 {
886  /* count total number of intersections that results */
887  int new_num_intersections = 0;
888  /* indexes into pos_updates */
889  size_t intersection_pos_ext = 0;
890 
891  int max_pos = -1;
892  for (int i = 0; i < num_intersections; ++i) {
893 
894  int intersection_size
895  = xt_idxlist_get_num_indices(intersections[i].list);
896 
897  int num_indices_to_remove = removals_per_intersection[i][0];
898 
899  /* intersection might be redundant */
900  if (num_indices_to_remove != intersection_size) {
901 
902  remap_intersection(mypart_idxlist, intersections[i].list,
903  (size_t)removals_per_intersection[i][1],
904  pos_updates + intersection_pos_ext,
905  resSets + new_num_intersections, 0);
906 
907  int max = pos_ext_find_max_pos(
908  resSets[new_num_intersections].num_transfer_pos_ext,
909  resSets[new_num_intersections].transfer_pos_ext);
910  if (max > max_pos) max_pos = max;
911  /* evaluate cache lazily */
912  resSets[new_num_intersections].transfer_pos = NULL;
913  resSets[new_num_intersections].num_transfer_pos
914  = intersection_size - num_indices_to_remove;
915  resSets[new_num_intersections].rank = intersections[i].rank;
916  new_num_intersections++;
917  intersection_pos_ext += (size_t)removals_per_intersection[i][1];
918  }
919  }
920 
921  return (struct tes_result) {
922  .resCount = new_num_intersections,
923  .max_pos = max_pos,
924  };
925 }
926 
927 static struct Xt_stripe *
928 refine_stripes(int *num_stripes_,
929  struct Xt_stripe *restrict intersection_idxstripes,
930  size_t num_pos_updates,
931  const struct Xt_pos_ext *restrict pos_updates)
932 {
933  /* trim intersection_idxstripes to those actually used */
934  size_t num_refined_intersection_idxstripes = 0,
935  size_refined_intersection_idxstripes = num_pos_updates;
936  struct Xt_stripe *restrict refined_intersection_idxstripes
937  = xmalloc(size_refined_intersection_idxstripes
938  * sizeof (*refined_intersection_idxstripes));
939  size_t i_stripe = 0;
940  int nstrides_psum = 0;
941  for (size_t i_pos_ext = 0; i_pos_ext < num_pos_updates; ++i_pos_ext)
942  {
943  int pos = pos_updates[i_pos_ext].start;
944  int size = pos_updates[i_pos_ext].size;
945  while (nstrides_psum + intersection_idxstripes[i_stripe].nstrides <= pos)
946  {
947  nstrides_psum += intersection_idxstripes[i_stripe].nstrides;
948  ++i_stripe;
949  }
950  do {
951  int instripe_pos = pos - nstrides_psum;
952  ENSURE_ARRAY_SIZE(refined_intersection_idxstripes,
953  size_refined_intersection_idxstripes,
954  num_refined_intersection_idxstripes + 1);
955  struct Xt_stripe cur_stripe = intersection_idxstripes[i_stripe];
956  int cur_stripe_nstrides = cur_stripe.nstrides;
957  int overlap = imin(cur_stripe_nstrides - instripe_pos, size);
958  cur_stripe.start
959  = (Xt_int)(cur_stripe.start
960  + (Xt_int)instripe_pos * cur_stripe.stride);
961  cur_stripe.nstrides = overlap;
962  refined_intersection_idxstripes[num_refined_intersection_idxstripes]
963  = cur_stripe;
964  ++num_refined_intersection_idxstripes;
965  i_stripe += (instripe_pos + overlap == cur_stripe_nstrides);
966  nstrides_psum += (instripe_pos + overlap == cur_stripe_nstrides)
967  ? cur_stripe_nstrides : 0;
968  pos += overlap;
969  size -= overlap;
970  } while (size);
971  }
972  free(intersection_idxstripes);
973  *num_stripes_ = (int)num_refined_intersection_idxstripes;
974  return refined_intersection_idxstripes;
975 }
976 
977 
978 /* match index stripes of intersection to corresponding positions in
979  * part list, optionally updating the stripes
980  * @param num_pos_updates number of position extents describing the
981  * subset of positions from intersections to use
982  * @param pos_updates list of position extents to use from \a intersection
983  */
984 static void
986  Xt_idxlist intersection,
987  size_t num_pos_updates,
988  const struct Xt_pos_ext pos_updates[num_pos_updates],
989  struct exchange_ext *resSet,
990  int single_match_only)
991 {
992  struct Xt_stripe *intersection_idxstripes;
993  int num_stripes;
994  xt_idxlist_get_index_stripes(intersection,
995  &intersection_idxstripes,
996  &num_stripes);
997  if (num_pos_updates)
998  intersection_idxstripes
999  = refine_stripes(&num_stripes, intersection_idxstripes,
1000  num_pos_updates, pos_updates);
1001 
1002  /* match back intersection_idxstripes to positions in mypart */
1003  resSet->transfer_pos_ext = NULL;
1004 #ifndef NDEBUG
1005  int retval =
1006 #endif
1008  mypart_idxlist, num_stripes, intersection_idxstripes,
1009  &resSet->num_transfer_pos_ext,
1010  &resSet->transfer_pos_ext, single_match_only);
1011  assert(retval == 0);
1012  free(intersection_idxstripes);
1013 }
1014 
1016  int n_out, const struct exchange_ext *restrict out_msg,
1017  int n_in, const struct exchange_ext *restrict in_msg,
1018  struct exchange_ext *restrict remote_out_msg,
1019  int tag_offset, MPI_Comm comm) {
1020 
1021  MPI_Request * requests
1022  = xmalloc((size_t)(n_in + 2 * n_out) * sizeof(*requests));
1023  MPI_Request *send_header_requests = requests,
1024  *recv_requests = requests + n_out,
1025  *send_data_requests = recv_requests + n_in;
1026 
1027  // set up receives for number of transfer_pos_ext per remote
1028  // for (int i = 0; i < n_in; ++i)
1029  for (int i = 0; i < n_in; ++i)
1030  xt_mpi_call(MPI_Irecv(
1031  &(remote_out_msg[i].num_transfer_pos_ext), 1, MPI_INT,
1032  in_msg[i].rank,
1034  comm, recv_requests + i), comm);
1035 
1036  // send number of transfer_pos_ext
1037  for (int i = 0; i < n_out; ++i) {
1038  xt_mpi_call(MPI_Isend(
1039  CAST_MPI_SEND_BUF(&(out_msg[i].num_transfer_pos_ext)),
1040  1, MPI_INT, out_msg[i].rank,
1042  comm, send_header_requests + i), comm);
1043 
1044  xt_mpi_call(MPI_Isend(
1045  CAST_MPI_SEND_BUF(out_msg[i].transfer_pos_ext),
1046  out_msg[i].num_transfer_pos_ext,
1047  MPI_2INT, out_msg[i].rank,
1049  comm, send_data_requests + i),
1050  comm);
1051  }
1052 
1053  // wait for the receiving of headers to complete
1054  xt_mpi_call(
1055  MPI_Waitall(n_out + n_in, send_header_requests, MPI_STATUSES_IGNORE), comm);
1056 
1057  size_t total_num_pos_ext_to_recv = 0;
1058 
1059  for (size_t i = 0; i < (size_t)n_in; ++i)
1060  total_num_pos_ext_to_recv +=
1061  (size_t)(remote_out_msg[i].num_transfer_pos_ext);
1062 
1063  struct Xt_pos_ext *transfer_pos_ext_buffer;
1064  if (total_num_pos_ext_to_recv > 0) {
1065 
1066  transfer_pos_ext_buffer
1067  = xmalloc(total_num_pos_ext_to_recv * sizeof(*transfer_pos_ext_buffer));
1068 
1069  // set up receive for transfer_pos_ext
1070  struct Xt_pos_ext *curr_transfer_pos_ext = transfer_pos_ext_buffer;
1071  for (int i = 0; i < n_in; ++i) {
1072  xt_mpi_call(MPI_Irecv(
1073  curr_transfer_pos_ext,
1074  remote_out_msg[i].num_transfer_pos_ext, MPI_2INT,
1075  in_msg[i].rank,
1077  comm, recv_requests + i), comm);
1078 
1079  remote_out_msg[i].transfer_pos_ext = curr_transfer_pos_ext;
1080  curr_transfer_pos_ext += remote_out_msg[i].num_transfer_pos_ext;
1081  }
1082  } else
1083  transfer_pos_ext_buffer = NULL;
1084 
1085  // wait until all communication is completed
1086  xt_mpi_call(
1087  MPI_Waitall(n_in + n_out, recv_requests, MPI_STATUSES_IGNORE), comm);
1088 
1089  free(requests);
1090  return transfer_pos_ext_buffer;
1091 }
1092 
1093 static void
1095 
1096  int buffer_size = 0;
1097  for (int i = 0; i < n; ++i)
1098  if (msg[i].transfer_pos == NULL && msg[i].num_transfer_pos > buffer_size)
1099  buffer_size = msg[i].num_transfer_pos;
1100 
1101  int *transfer_pos_buffer
1102  = buffer_size > 0
1103  ? xmalloc((size_t)buffer_size * sizeof(*transfer_pos_buffer))
1104  : NULL;
1105 
1106  for (int i = 0; i < n; ++i) {
1107 
1108  // get the positions array
1109  int *restrict transfer_pos;
1110  size_t num_transfer_pos = (size_t)(msg[i].num_transfer_pos);
1111  if (msg[i].transfer_pos != NULL) {
1112  transfer_pos = msg[i].transfer_pos;
1113  } else {
1114  transfer_pos = transfer_pos_buffer;
1115  generate_pos(
1116  (size_t)(msg[i].num_transfer_pos_ext), msg[i].transfer_pos_ext,
1117  num_transfer_pos, transfer_pos);
1118  }
1119 
1120  // sort the positions array
1121  xt_quicksort_int(transfer_pos, num_transfer_pos);
1122 
1123  // convert the positions array to position extents array
1124  size_t num_transfer_pos_ext = count_pos_ext(num_transfer_pos, transfer_pos);
1125  struct Xt_pos_ext * transfer_pos_ext;
1126  if (num_transfer_pos_ext != (size_t)(msg[i].num_transfer_pos_ext)) {
1127  msg[i].num_transfer_pos_ext = (int)num_transfer_pos_ext;
1128  transfer_pos_ext =
1129  (msg[i].transfer_pos_ext =
1130  xrealloc(msg[i].transfer_pos_ext,
1131  num_transfer_pos_ext * sizeof(*transfer_pos_ext)));
1132  } else {
1133  transfer_pos_ext = msg[i].transfer_pos_ext;
1134  }
1136  num_transfer_pos, transfer_pos, num_transfer_pos_ext, transfer_pos_ext);
1137  }
1138 
1139  if (buffer_size > 0) free(transfer_pos_buffer);
1140 }
1141 
1142 static void
1144  struct exchange_ext *msg,
1145  struct exchange_ext *permutation_msg) {
1146 
1147  size_t buffer_size = 0;
1148  for (int i = 0; i < n; ++i) {
1149  assert(msg[i].transfer_pos == NULL
1150  && permutation_msg[i].transfer_pos == NULL);
1151  size_t curr_buffer_size
1152  = (size_t)(msg[i].num_transfer_pos)
1153  + (size_t)(permutation_msg[i].num_transfer_pos);
1154  if (curr_buffer_size > buffer_size) buffer_size = curr_buffer_size;
1155  }
1156 
1157  int *transfer_pos_buffer
1158  = buffer_size > 0
1159  ? xmalloc(buffer_size * sizeof(*transfer_pos_buffer))
1160  : NULL;
1161 
1162  for (int i = 0; i < n; ++i) {
1163 
1164  // get the positions array
1165  size_t num_transfer_pos = (size_t)(msg[i].num_transfer_pos);
1166 
1167  int *restrict transfer_pos = transfer_pos_buffer;
1168  generate_pos(
1169  (size_t)(msg[i].num_transfer_pos_ext), msg[i].transfer_pos_ext,
1170  num_transfer_pos, transfer_pos);
1171 
1172  // get the permutation array
1173  int *permutation = transfer_pos_buffer + num_transfer_pos;
1174  generate_pos(
1175  (size_t)(permutation_msg[i].num_transfer_pos_ext),
1176  permutation_msg[i].transfer_pos_ext, num_transfer_pos, permutation);
1177 
1178  // sort the positions array
1179  xt_quicksort_int_permutation(transfer_pos, num_transfer_pos, permutation);
1180 
1181  // convert the permutation array to position extents array
1182  size_t num_transfer_pos_ext = count_pos_ext(num_transfer_pos, permutation);
1183  struct Xt_pos_ext * transfer_pos_ext;
1184  if (num_transfer_pos_ext !=
1185  (size_t)(permutation_msg[i].num_transfer_pos_ext)) {
1186  permutation_msg[i].num_transfer_pos_ext = (int)num_transfer_pos_ext;
1187  permutation_msg[i].transfer_pos_ext
1188  = transfer_pos_ext
1189  = xrealloc(permutation_msg[i].transfer_pos_ext,
1190  num_transfer_pos_ext * sizeof(*transfer_pos_ext));
1191  } else {
1192  transfer_pos_ext = permutation_msg[i].transfer_pos_ext;
1193  }
1195  num_transfer_pos, permutation, num_transfer_pos_ext, transfer_pos_ext);
1196  }
1197 
1198  if (buffer_size > 0) free(transfer_pos_buffer);
1199 }
1200 
1202  int n_out, int n_in, struct exchange_ext * out_msg,
1203  struct exchange_ext * in_msg, int tag_offset, MPI_Comm comm) {
1204 
1205  int comm_size;
1206  xt_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1207 
1208  struct exchange_ext *restrict remote_out_msg =
1209  xmalloc((size_t)n_in * sizeof(*remote_out_msg));
1210 
1211  for (int i = 0; i < n_in; ++i) {
1212  remote_out_msg[i].rank = in_msg[i].rank;
1213  remote_out_msg[i].num_transfer_pos = in_msg[i].num_transfer_pos;
1214  remote_out_msg[i].transfer_pos = NULL;
1215  }
1216 
1217  // exchange transfer_pos_ext of out messages to respective receivers
1218  struct Xt_pos_ext *transfer_pos_ext_buffer =
1220  n_out, out_msg, n_in, in_msg, remote_out_msg, tag_offset, comm);
1221 
1222  // sort the transfer positions in all out messages
1223  sort_transfer_pos_ext(n_out, out_msg);
1224 
1225  // sort the transfer positions in all in messages based on the remote out
1226  // messages
1227  sort_transfer_pos_ext_permutation(n_in, remote_out_msg, in_msg);
1228 
1229  free(transfer_pos_ext_buffer);
1230  free(remote_out_msg);
1231 }
1232 
1233 static Xt_xmap
1235 
1236  Xt_xmap_intersection_ext xmap_intersection_ext_new =
1238 
1239  int n_out = xmap_intersection_ext_new->n_out;
1240  int n_in = xmap_intersection_ext_new->n_in;
1241  struct exchange_ext *in_msg = xmap_intersection_ext_new->msg,
1242  *out_msg = in_msg + n_in;
1243  MPI_Comm comm = xmap_intersection_ext_new->comm;
1244  int tag_offset = xmap_intersection_ext_new->tag_offset;
1245 
1246  switch ((int)type) {
1247  case (XT_REORDER_NONE):
1248  break;
1249  case (XT_REORDER_SEND_UP):
1250  reorder_transfer_pos_ext(n_out, n_in, out_msg, in_msg, tag_offset, comm);
1251  break;
1252  case (XT_REORDER_RECV_UP):
1253  reorder_transfer_pos_ext(n_in, n_out, in_msg, out_msg, tag_offset, comm);
1254  break;
1255  default:
1256  Xt_abort(comm, "ERROR(xmap_intersection_ext_reorder):invalid reorder "
1257  "type", __FILE__, __LINE__);
1258  };
1259 
1260  return (Xt_xmap)xmap_intersection_ext_new;
1261 }
1262 
1263 struct up_state
1264 {
1266  const int *new_pos;
1267 };
1268 
1269 static int
1270 update_positions(size_t num_orig_pos_ext,
1271  size_t *num_pos_ext,
1272  struct Xt_pos_ext **pos_ext,
1273  const struct Xt_pos_ext *orig_pos_ext,
1274  size_t num_orig_pos, const int *orig_pos,
1275  void *state_)
1276 {
1277  (void)num_orig_pos_ext;
1278  struct up_state *state = state_;
1279  int *pos_buffer = state->pos_buffer;
1280  const int *restrict new_pos = state->new_pos;
1281  const int *pos;
1282  if (orig_pos)
1283  pos = orig_pos;
1284  else {
1285  generate_pos(num_orig_pos_ext, orig_pos_ext, num_orig_pos, pos_buffer);
1286  pos = pos_buffer;
1287  }
1288  // update positions
1289  int max_pos = 0;
1290  for (size_t j = 0; j < num_orig_pos; ++j) {
1291  int np = new_pos[pos[j]];
1292  pos_buffer[j] = np;
1293  if (np > max_pos)
1294  max_pos = np;
1295  }
1296 
1297  // convert the array of substituted positions into position extents array
1298  size_t num_pos_ext_ = *num_pos_ext = count_pos_ext(num_orig_pos, pos_buffer);
1299  struct Xt_pos_ext *pos_ext_
1300  = *pos_ext = xmalloc(num_pos_ext_ * sizeof (*pos_ext_));
1301  generate_pos_ext(num_orig_pos, pos_buffer, num_pos_ext_, pos_ext_);
1302  return max_pos;
1303 }
1304 
1305 static Xt_xmap
1307  const int *src_positions,
1308  const int *dst_positions) {
1309 
1310  Xt_xmap_intersection_ext xmie_orig = xmie(xmap);
1311  size_t max_num_pos = 0;
1312  size_t n = (size_t)xmie_orig->n_in + (size_t)xmie_orig->n_out;
1313  const struct exchange_ext *restrict msg = xmie_orig->msg;
1314  for (size_t i = 0; i < n; ++i)
1315  if ((size_t)msg[i].num_transfer_pos > max_num_pos)
1316  max_num_pos = (size_t)msg[i].num_transfer_pos;
1317  int *pos_buffer
1318  = max_num_pos > 0
1319  ? xmalloc((size_t)max_num_pos * sizeof(*pos_buffer))
1320  : NULL;
1321  struct up_state ups_in = { pos_buffer, dst_positions },
1322  ups_out = { pos_buffer, src_positions };
1323  Xt_xmap xmap_new
1324  = xmap_intersection_ext_copy_(xmap, 1,
1325  update_positions, &ups_in,
1326  update_positions, &ups_out);
1327  free(pos_buffer);
1328  return xmap_new;
1329 }
1330 
1331 struct spread_state
1332 {
1333  int num_repetitions;
1334  const int *displacements;
1335 };
1336 
1337 static int
1338 pos_ext_copy_spread(size_t num_orig_pos_ext,
1339  size_t *num_pos_ext,
1340  struct Xt_pos_ext **pos_ext,
1341  const struct Xt_pos_ext *orig_pos_ext,
1342  size_t num_orig_pos, const int *orig_pos,
1343  void *state)
1344 {
1345  (void)num_orig_pos; (void)orig_pos;
1346  struct spread_state *sp = state;
1347  int num_repetitions = sp->num_repetitions;
1348  const int *restrict displacements = sp->displacements;
1349  size_t new_num_pos_ext = num_orig_pos_ext * (size_t)num_repetitions;
1350  size_t size_pos_ext = new_num_pos_ext * sizeof (**pos_ext);
1351  struct Xt_pos_ext *pos_ext_ = *pos_ext = xmalloc(size_pos_ext);
1352  int max_pos = -1;
1353  for (int i = 0; i < num_repetitions; ++i) {
1354  struct Xt_pos_ext *restrict curr_pos_ext =
1355  pos_ext_ + (size_t)i * num_orig_pos_ext;
1356  const int curr_displacement = displacements[i];
1357  for (size_t j = 0; j < num_orig_pos_ext; ++j) {
1358  int start = orig_pos_ext[j].start + curr_displacement,
1359  size = orig_pos_ext[j].size,
1360  max = size > 0 ? start + size - 1 : start;
1361  if (max > max_pos)
1362  max_pos = max;
1363  curr_pos_ext[j] = (struct Xt_pos_ext){ .start = start, .size = size };
1364  }
1365  }
1366  *num_pos_ext = new_num_pos_ext;
1367  return max_pos;
1368 }
1369 
1370 
1371 static Xt_xmap
1372 xmap_intersection_ext_spread(Xt_xmap xmap, int num_repetitions,
1373  const int src_displacements[num_repetitions],
1374  const int dst_displacements[num_repetitions]) {
1375 
1376 
1377  return xmap_intersection_ext_copy_(xmap, num_repetitions,
1379  &(struct spread_state){
1380  .num_repetitions = num_repetitions,
1381  .displacements = src_displacements },
1383  &(struct spread_state){
1384  .num_repetitions = num_repetitions,
1385  .displacements = dst_displacements });
1386 }
1387 
1388 
1389 /* iterator operations */
1390 
1393 static int const *
1395 static int
1397 static const struct Xt_pos_ext *
1399 static int
1402 
1403 static const struct Xt_xmap_iter_vtable
1410  .get_num_transfer_pos_ext
1413 
1415 
1417 
1419 
1422 };
1423 
1425 
1426  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
1427 
1428  if (xmap_intersection_ext->n_in == 0)
1429  return NULL;
1430 
1431  Xt_xmap_iter_intersection_ext iter = xmalloc(sizeof (*iter));
1432 
1434  iter->msg = xmap_intersection_ext->msg;
1435  iter->msgs_left = xmap_intersection_ext->n_in - 1;
1436 
1437  return (Xt_xmap_iter)iter;
1438 }
1439 
1441 
1442  Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
1443 
1444  if (xmap_intersection_ext->n_out == 0)
1445  return NULL;
1446 
1447  Xt_xmap_iter_intersection_ext iter = xmalloc(sizeof (*iter));
1448 
1450  iter->msg = xmap_intersection_ext->msg + xmap_intersection_ext->n_in;
1451  iter->msgs_left = xmap_intersection_ext->n_out - 1;
1452 
1453  return (Xt_xmap_iter)iter;
1454 }
1455 
1456 static inline Xt_xmap_iter_intersection_ext
1457 xmiei(void *iter)
1458 {
1459  return (Xt_xmap_iter_intersection_ext)iter;
1460 }
1461 
1463 
1464  Xt_xmap_iter_intersection_ext iter_intersection = xmiei(iter);
1465 
1466  if (iter_intersection == NULL || iter_intersection->msgs_left == 0)
1467  return 0;
1468 
1469  iter_intersection->msg++;
1470  iter_intersection->msgs_left--;
1471 
1472  return 1;
1473 }
1474 
1476 
1477  assert(iter != NULL);
1478  return xmiei(iter)->msg->rank;
1479 }
1480 
1481 static int const *
1483 
1484  assert(iter != NULL);
1485  struct exchange_ext *restrict msg = xmiei(iter)->msg;
1486  if ((!msg->num_transfer_pos) | (msg->transfer_pos != NULL)) { } else {
1487  size_t num_transfer_pos = (size_t)msg->num_transfer_pos;
1488  int * transfer_pos =
1489  (msg->transfer_pos = xmalloc(num_transfer_pos * sizeof(*transfer_pos)));
1490  generate_pos(
1491  (size_t)msg->num_transfer_pos_ext, msg->transfer_pos_ext,
1493  }
1494  return msg->transfer_pos;
1495 }
1496 
1497 static int
1499  assert(iter != NULL);
1500  return xmiei(iter)->msg->num_transfer_pos;
1501 }
1502 
1503 static const struct Xt_pos_ext *
1505  assert(iter != NULL);
1506  return xmiei(iter)->msg->transfer_pos_ext;
1507 }
1508 
1509 static int
1511  assert(iter != NULL);
1512  return xmiei(iter)->msg->num_transfer_pos_ext;
1513 }
1514 
1516 
1517  free(iter);
1518 }
1519 
1520 /*
1521  * Local Variables:
1522  * c-basic-offset: 2
1523  * coding: utf-8
1524  * indent-tabs-mode: nil
1525  * show-trailing-whitespace: t
1526  * require-trailing-newline: t
1527  * End:
1528  */
int MPI_Comm
Definition: core.h:64
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
integer, parameter, public sp
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
Definition: ppm_xfuncs.h:71
#define xmalloc(size)
Definition: ppm_xfuncs.h:70
quicksort declaration
void xt_quicksort_int(int a[], size_t n)
void xt_quicksort_int_permutation(int a[], size_t n, int permutation[])
struct Xt_pos_ext * pos_ext
Definition: xt_cover.h:60
size_t num_pos_ext
Definition: xt_cover.h:59
size_t size_pos_ext
Definition: xt_cover.h:59
int size
Definition: xt_core.h:93
int start
Definition: xt_core.h:93
Xt_int stride
Definition: xt_stripe.h:56
int nstrides
Definition: xt_stripe.h:57
Xt_int start
Definition: xt_stripe.h:55
const struct Xt_xmap_vtable * vtable
const struct Xt_xmap_iter_vtable * vtable
int(* next)(Xt_xmap_iter iter)
MPI_Comm(* get_communicator)(Xt_xmap)
struct Xt_pos_ext * transfer_pos_ext
const int *restrict displacements
struct Xt_pos_ext_vec cover
static int isign_mask(int x)
static int imin(int a, int b)
int xt_initialized(void)
Definition: xt_init.c:107
XT_INT Xt_int
Definition: xt_core.h:68
void xt_cover_range_append(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_ext range)
Definition: xt_cover.c:128
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
Definition: xt_cover.c:69
bool xt_idxlist_pos_ext_is_full_cover(Xt_idxlist idxlist, struct Xt_pos_ext_vec cover)
Definition: xt_cover.c:75
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, bool forward, size_t search_start_pos)
Definition: xt_cover.c:148
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
Definition: xt_cover.c:60
index list declaration
int xt_idxlist_get_num_indices(Xt_idxlist idxlist)
Definition: xt_idxlist.c:98
void xt_idxlist_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
Definition: xt_idxlist.c:118
int xt_idxlist_get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only)
Definition: xt_idxlist.c:237
MPI_Comm xt_mpi_comm_smart_dup(MPI_Comm comm, int *tag_offset)
Definition: xt_mpi.c:813
void xt_mpi_comm_smart_dedup(MPI_Comm *comm, int tag_offset)
Definition: xt_mpi.c:864
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition: xt_mpi.h:68
@ xt_mpi_tag_xmap_intersection_data_exchange
@ xt_mpi_tag_xmap_intersection_header_exchange
exchange map declarations
xt_reorder_type
Definition: xt_xmap.h:239
@ XT_REORDER_RECV_UP
optimise data access on receiver side
Definition: xt_xmap.h:242
@ XT_REORDER_NONE
no reordering
Definition: xt_xmap.h:240
@ XT_REORDER_SEND_UP
optimise data access on sender side
Definition: xt_xmap.h:241
contains declaration for the exchange map data structure
Utility functions shared by xt_xmap_intersection and xt_xmap_intersection_ext.
static void print_miss_msg(Xt_idxlist dst_idxlist, int missing_pos, MPI_Comm comm, const char *source, int line) __attribute__((noreturn))
static size_t count_pos_ext(size_t num_pos, const int *restrict pos)
static void generate_pos(size_t num_pos_ext, const struct Xt_pos_ext *restrict pos_ext, size_t num_pos, int *restrict pos)
static void generate_pos_ext(size_t num_pos, const int *restrict pos, size_t num_pos_ext, struct Xt_pos_ext *restrict pos_ext)
static const struct Xt_xmap_vtable xmap_intersection_vtable
static Xt_xmap xmap_intersection_ext_copy(Xt_xmap xmap)
static const struct Xt_xmap_iter_vtable xmap_iterator_intersection_ext_vtable
static int xmap_intersection_ext_get_num_destinations(Xt_xmap xmap)
struct Xt_xmap_iter_intersection_ext_ * Xt_xmap_iter_intersection_ext
static Xt_xmap xmap_intersection_ext_copy_(Xt_xmap xmap, int num_repetitions, Xt_pos_ext_copy pe_cpy_in, void *peci_state, Xt_pos_ext_copy pe_cpy_out, void *peco_state)
static int xmap_intersection_ext_get_max_dst_pos(Xt_xmap xmap)
static Xt_xmap xmap_intersection_ext_reorder(Xt_xmap xmap, enum xt_reorder_type type)
static Xt_xmap_iter xmap_intersection_ext_get_out_iterator(Xt_xmap xmap)
static void sort_transfer_pos_ext(int n, struct exchange_ext *msg)
static struct Xt_pos_ext * exchange_pos_ext_modifications(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], struct exchange_ext dst_ext[num_dst_intersections], int(*restrict src_removals_per_intersection)[2], const int(*restrict dst_removals_per_intersection)[2], int tag_offset, MPI_Comm comm)
static void xmap_intersection_ext_get_destination_ranks(Xt_xmap xmap, int *ranks)
static void remap_dst_intersections(int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist mypart_idxlist, int resCount, struct exchange_ext resSets[resCount], const int(*removals_per_intersection)[2])
Xt_xmap xt_xmap_intersection_ext_new(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, MPI_Comm comm)
static struct Xt_pos_ext * exchange_transfer_pos_ext(int n_out, const struct exchange_ext *restrict out_msg, int n_in, const struct exchange_ext *restrict in_msg, struct exchange_ext *restrict remote_out_msg, int tag_offset, MPI_Comm comm)
static const struct Xt_pos_ext * xmap_intersection_ext_iterator_get_transfer_pos_ext(Xt_xmap_iter iter)
static void xmap_intersection_ext_get_source_ranks(Xt_xmap xmap, int *ranks)
static void generate_transfer_ext(struct Xt_xmap_intersection_ext_ *xmap, int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist_local, Xt_idxlist dst_idxlist_local, MPI_Comm comm)
static int xmap_intersection_ext_get_num_sources(Xt_xmap xmap)
static Xt_xmap xmap_intersection_ext_spread(Xt_xmap xmap, int num_repetitions, const int src_displacements[num_repetitions], const int dst_displacements[num_repetitions])
static int const * xmap_intersection_ext_iterator_get_transfer_pos(Xt_xmap_iter iter)
static int update_positions(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state_)
static struct tes_result generate_dir_transfer_pos_ext_src(int num_intersections, const struct Xt_com_list intersections[num_intersections], Xt_idxlist mypart_idxlist, struct exchange_ext *resSets, const int(*restrict removals_per_intersection)[2], const struct Xt_pos_ext *pos_updates)
static void xmap_intersection_ext_msg_copy(size_t nmsg, const struct exchange_ext *restrict msg, int *nmsg_copy, struct exchange_ext *restrict msg_copy, int *max_pos_, int num_repetitions, Xt_pos_ext_copy pos_ext_copy, void *pec_state)
static int xmap_intersection_ext_iterator_get_num_transfer_pos(Xt_xmap_iter iter)
static int xmap_intersection_ext_get_max_src_pos(Xt_xmap xmap)
static Xt_xmap xmap_intersection_ext_update_positions(Xt_xmap xmap, const int *src_positions, const int *dst_positions)
static void reorder_transfer_pos_ext(int n_out, int n_in, struct exchange_ext *out_msg, struct exchange_ext *in_msg, int tag_offset, MPI_Comm comm)
static void remap_intersection(Xt_idxlist mypart_idxlist, Xt_idxlist intersection, size_t num_pos_updates, const struct Xt_pos_ext pos_updates[num_pos_updates], struct exchange_ext *resSet, int single_match_only)
static void xmap_intersection_ext_delete(Xt_xmap xmap)
static Xt_xmap_iter xmap_intersection_ext_get_in_iterator(Xt_xmap xmap)
static Xt_xmap_intersection_ext xmie(void *xmap)
static struct Xt_pos_ext * get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, int single_match_only)
static MPI_Comm xmap_intersection_ext_get_communicator(Xt_xmap xmap)
static int xmap_intersection_ext_iterator_get_num_transfer_pos_ext(Xt_xmap_iter iter)
static void cut_pos_ext_from_pos_exts(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *pos_exts)
static void sort_transfer_pos_ext_permutation(int n, struct exchange_ext *msg, struct exchange_ext *permutation_msg)
static int xmap_intersection_ext_iterator_get_rank(Xt_xmap_iter iter)
static struct Xt_stripe * refine_stripes(int *num_stripes_, struct Xt_stripe *restrict intersection_idxstripes, size_t num_pos_updates, const struct Xt_pos_ext *restrict pos_updates)
static struct Xt_pos_ext_overlap Xt_get_pos_ext_overlap(struct Xt_pos_ext a, struct Xt_pos_ext b)
static int pos_ext_copy_verbatim(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)
static void xt_free_exchange_ext(size_t num_msg, struct exchange_ext *restrict msg)
static struct ted_result generate_dir_transfer_pos_ext_dst(int num_intersections, const struct Xt_com_list intersections[num_intersections], Xt_idxlist mypart_idxlist, struct exchange_ext *resSets, int(*restrict dst_removals_per_intersection)[2])
static Xt_xmap_iter_intersection_ext xmiei(void *iter)
static void xmap_intersection_ext_iterator_delete(Xt_xmap_iter iter)
struct Xt_xmap_intersection_ext_ * Xt_xmap_intersection_ext
static int pos_ext_find_max_pos(int num_pos_ext, const struct Xt_pos_ext *restrict pos_ext)
static int xmap_intersection_ext_iterator_next(Xt_xmap_iter iter)
int(* Xt_pos_ext_copy)(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)
static int pos_ext_copy_spread(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)