91 const int * src_positions,
92 const int * dst_positions);
95 const int src_displacements[num_repetitions],
96 const int dst_displacements[num_repetitions]);
149 return xmap_intersection_ext->
comm;
156 return xmap_intersection_ext->
n_out;
163 return xmap_intersection_ext->
n_in;
170 size_t n_out = (size_t)xmap_intersection_ext->
n_out;
172 = xmap_intersection_ext->
msg + xmap_intersection_ext->
n_in;
173 for (
size_t i = 0; i <
n_out; ++i)
174 ranks[i] = out_msg[i].rank;
181 size_t n_in = (size_t)xmap_intersection_ext->
n_in;
182 const struct exchange_ext *restrict in_msg = xmap_intersection_ext->
msg;
183 for (
size_t i = 0; i <
n_in; ++i)
184 ranks[i] = in_msg[i].rank;
199 size_t num_orig_pos,
const int *orig_pos,
207 size_t num_orig_pos,
const int *orig_pos,
210 (void)state; (void)num_orig_pos; (void)orig_pos;
211 size_t size_pos_ext = num_orig_pos_ext *
sizeof (**pos_ext);
213 memcpy(pos_ext_, orig_pos_ext, size_pos_ext);
214 *num_pos_ext = num_orig_pos_ext;
223 int *max_pos_,
int num_repetitions,
226 *nmsg_copy = (int)nmsg;
228 for (
size_t i = 0; i < nmsg; ++i) {
229 msg_copy[i].num_transfer_pos = num_repetitions * msg[i].num_transfer_pos;
230 msg_copy[i].rank = msg[i].rank;
231 msg_copy[i].transfer_pos = NULL;
232 size_t num_transfer_pos_ext;
234 = pos_ext_copy((
size_t)msg[i].num_transfer_pos_ext, &num_transfer_pos_ext,
235 &msg_copy[i].transfer_pos_ext, msg[i].transfer_pos_ext,
236 (
size_t)msg[i].num_transfer_pos, msg[i].transfer_pos,
238 if (new_max_pos > max_pos)
239 max_pos = new_max_pos;
240 msg_copy[i].num_transfer_pos_ext = (int)num_transfer_pos_ext;
252 xmap_intersection_ext_new;
253 size_t n_in = (size_t)xmap_intersection_ext->
n_in,
254 n_out = (
size_t)xmap_intersection_ext->
n_out,
255 num_isect = n_in + n_out;
256 xmap_intersection_ext_new
257 =
xmalloc(
sizeof (*xmap_intersection_ext_new)
259 xmap_intersection_ext_new->vtable = xmap_intersection_ext->
vtable;
260 xmap_intersection_ext_new->n_in = (int)n_in;
261 xmap_intersection_ext_new->n_out = (int)n_out;
262 xmap_intersection_ext_new->max_src_pos = xmap_intersection_ext->
max_src_pos;
263 xmap_intersection_ext_new->max_dst_pos = xmap_intersection_ext->
max_dst_pos;
265 &xmap_intersection_ext_new->n_in,
266 xmap_intersection_ext_new->msg,
267 &xmap_intersection_ext_new->max_dst_pos,
268 num_repetitions, pe_cpy_in, peci_state);
270 &xmap_intersection_ext_new->n_out,
271 xmap_intersection_ext_new->msg+n_in,
272 &xmap_intersection_ext_new->max_src_pos,
273 num_repetitions, pe_cpy_out, peco_state);
274 xmap_intersection_ext_new->comm
276 &xmap_intersection_ext_new->tag_offset);
277 return (
Xt_xmap)xmap_intersection_ext_new;
292 for (
size_t i = 0; i < num_msg; ++i) {
293 free(msg[i].transfer_pos);
294 free(msg[i].transfer_pos_ext);
301 size_t num_isect = (size_t)xmap_intersection_ext->
n_in
302 + (
size_t)xmap_intersection_ext->
n_out;
306 free(xmap_intersection_ext);
311 int num_src_intersections,
312 const struct Xt_com_list src_com[num_src_intersections],
313 int num_dst_intersections,
314 const struct Xt_com_list dst_com[num_dst_intersections],
322 src_com[num_src_intersections],
323 int num_dst_intersections,
325 dst_com[num_dst_intersections],
333 = (size_t)num_dst_intersections + (
size_t)num_src_intersections;
343 num_src_intersections, src_com,
344 num_dst_intersections, dst_com,
345 src_idxlist, dst_idxlist, comm);
347 size_t new_num_isect = (size_t)xmap->
n_in + (
size_t)xmap->
n_out;
348 if (new_num_isect != num_isect)
349 xmap =
xrealloc(xmap,
sizeof (*xmap) + (new_num_isect
364 int num_intersections,
365 const struct
Xt_com_list intersections[num_intersections],
368 int (*restrict dst_removals_per_intersection)[2]);
373 int num_src_intersections,
374 const struct Xt_com_list src_com[num_src_intersections],
375 int num_dst_intersections,
376 const struct Xt_com_list dst_com[num_dst_intersections],
378 int (*restrict src_removals_per_intersection)[2],
379 const int (*restrict dst_removals_per_intersection)[2],
385 const struct Xt_com_list dst_com[num_dst_intersections],
389 const int (*removals_per_intersection)[2]);
398 int num_intersections,
399 const struct
Xt_com_list intersections[num_intersections],
402 const int (*restrict removals_per_intersection)[2],
407 int num_src_intersections,
408 const struct Xt_com_list src_com[num_src_intersections],
409 int num_dst_intersections,
410 const struct Xt_com_list dst_com[num_dst_intersections],
421 int (*src_removals_per_intersection)[2] =
422 xmalloc(((
size_t)num_dst_intersections + (
size_t)num_src_intersections)
423 *
sizeof(*src_removals_per_intersection)),
424 (*dst_removals_per_intersection)[2]
425 = src_removals_per_intersection + num_src_intersections;
430 num_dst_intersections, dst_com, dst_idxlist,
431 xmap->
msg, dst_removals_per_intersection);
435 Xt_abort(comm,
"ERROR: ups...this should not have happend...", __FILE__,
437 int first_missing_pos
440 print_miss_msg(dst_idxlist, first_missing_pos, comm, __FILE__, __LINE__);
449 num_src_intersections, src_com, num_dst_intersections, dst_com, xmap->
msg,
450 src_removals_per_intersection,
451 (
const int (*)[2])dst_removals_per_intersection, xmap->
tag_offset, comm);
455 (
const int (*)[2])dst_removals_per_intersection);
457 src_removals_per_intersection =
458 xrealloc(src_removals_per_intersection, (
size_t)num_src_intersections
459 *
sizeof(*src_removals_per_intersection));
463 num_src_intersections, src_com, src_idxlist, xmap->
msg+xmap->
n_in,
464 (
const int (*)[2])src_removals_per_intersection, pos_updates);
467 free(src_removals_per_intersection);
481 a_s = a.start + (aSizeMaskNeg & (a.size + 1)),
482 a_e = a.start + (~aSizeMaskNeg & (a.size - 1)),
484 b_s = b.start + (bSizeMaskNeg & (b.size + 1)),
485 b_e = b.start + (~bSizeMaskNeg & (b.size - 1));
487 if ((b_s > a_e) | (a_s > b_e))
491 int lowSkipA = b_s - a_s;
492 int lowSkipB = -lowSkipA;
493 lowSkipA = (int)((
unsigned)(lowSkipA + abs(lowSkipA))/2U);
494 lowSkipB = (int)((
unsigned)(lowSkipB + abs(lowSkipB))/2U);
495 int overlapLen =
imin(b_e - b_s - lowSkipB + 1,
496 abs(a.size) - lowSkipA);
497 int highSkipA = abs(a.size) - lowSkipA - overlapLen;
500 int aSkipLen = (~aSizeMaskNeg & lowSkipA)
501 | (aSizeMaskNeg & -highSkipA),
502 aTailLen = (aSizeMaskNeg & -lowSkipA)
503 | (~aSizeMaskNeg & highSkipA);
517 const struct Xt_stripe stripes[num_stripes],
519 int single_match_only)
526 idxlist, num_stripes, stripes,
527 num_ext, &pos_ext, single_match_only);
534 int num_intersections,
535 const struct
Xt_com_list intersections[num_intersections],
538 int (*restrict dst_removals_per_intersection)[2])
540 int new_num_intersections = 0;
546 enum { initial_vec_size = 8 };
550 for (
int i = 0; i < num_intersections; ++i) {
552 int num_stripes, num_indices_to_remove = 0;
553 struct Xt_stripe *intersection_idxstripes;
555 &intersection_idxstripes, &num_stripes);
556 int num_isect_pos_exts;
559 mypart_idxlist, num_stripes, intersection_idxstripes,
560 &num_isect_pos_exts, 1);
561 int isect_pos_exts_size_psum = 0;
567 .size_pos_ext = initial_vec_size,
568 .pos_ext =
xrealloc(intersection_idxstripes,
569 sizeof (
struct Xt_pos_ext) * initial_vec_size) };
570 intersection_idxstripes = NULL;
575 for (
size_t j = 0; j < (size_t)num_isect_pos_exts; ++j) {
576 struct Xt_pos_ext isect_pos_ext = isect_pos_exts[j];
578 int isign_mask_isect_pos_ext_size =
isign_mask(isect_pos_ext.
size);
580 += isign_mask_isect_pos_ext_size & (isect_pos_ext.
size + 1);
581 int isect_pos_ext_orig_size = isect_pos_ext.
size;
582 isect_pos_ext.
size = abs(isect_pos_ext.
size);
583 isect_pos_exts_size_psum += isect_pos_ext.
size;
586 int progress = -isect_pos_ext.
size;
587 size_t search_start_pos = 0, insert_pos;
591 .end = isect_pos_ext.
start + isect_pos_ext.
size - 1 };
594 if (insert_pos == SIZE_MAX)
595 goto next_isect_pos_ext;
605 cover.pos_ext[insert_pos].start -= overlap_desc.
skip;
606 cover.pos_ext[insert_pos].size += overlap_desc.
skip;
610 && (cover.pos_ext[insert_pos].start
611 == (cover.pos_ext[insert_pos - 1].start
612 + cover.pos_ext[insert_pos - 1].size)))
614 cover.pos_ext[insert_pos - 1].size
615 += cover.pos_ext[insert_pos].size;
616 memmove(cover.pos_ext + insert_pos, cover.pos_ext + insert_pos + 1,
617 (--cover.num_pos_ext - insert_pos)
618 * sizeof (*cover.pos_ext));
621 progress = (~isign_mask_isect_pos_ext_size
622 & (progress + overlap_desc.
skip))
623 | (isign_mask_isect_pos_ext_size
624 & (isect_pos_ext_orig_size + overlap_desc.
tail));
626 num_indices_to_remove += overlap_desc.
overlap;
628 .start = isect_pos_exts_size_psum + progress,
629 .size = overlap_desc.
overlap }, &transferable);
630 progress += overlap_desc.
overlap;
634 isect_pos_ext.
size = overlap_desc.
tail;
635 search_start_pos = ++insert_pos;
636 }
while ((isect_pos_ext.
size != 0)
637 & (search_start_pos != cover.num_pos_ext));
638 if (isect_pos_ext.
size)
646 if (intersection_size > num_indices_to_remove) {
647 resSets[new_num_intersections].transfer_pos_ext
651 resSets[new_num_intersections].transfer_pos = NULL;
652 resSets[new_num_intersections].num_transfer_pos
653 = intersection_size - num_indices_to_remove;
654 resSets[new_num_intersections].num_transfer_pos_ext
656 resSets[new_num_intersections].rank = intersections[i].rank;
657 ++new_num_intersections;
660 dst_removals_per_intersection[i][0] = num_indices_to_remove;
661 dst_removals_per_intersection[i][1]
662 = ((num_indices_to_remove == intersection_size)
663 | (num_indices_to_remove == 0))?0:(
int)transferable.
num_pos_ext;
664 free(isect_pos_exts);
668 #if defined __PGI && __PGIC__ <= 13
671 r.resCount = new_num_intersections;
676 .resCount = new_num_intersections };
686 size_t i = num_pos_exts_;
689 int db_skip = pos_ext.
start - pos_exts_[i].start;
690 if ((!db_skip) & (pos_ext.
size == pos_exts_[i].size))
693 memmove(pos_exts_ + i, pos_exts_ + i + 1,
694 sizeof (*pos_exts_) * (num_pos_exts_ - i - 1));
697 else if (db_skip + pos_ext.
size == pos_exts_[i].size)
700 pos_exts_[i].size -= pos_ext.
size;
702 else if (db_skip == 0)
705 pos_exts_[i].start = pos_ext.
start + pos_ext.
size;
706 pos_exts_[i].size -= pos_ext.
size;
714 memmove(pos_exts_ + i + 1, pos_exts_ + i,
715 (num_pos_exts_ - i) *
sizeof (*pos_exts_));
720 .size = orig.
size - db_skip - pos_ext.
size };
727 int num_src_intersections,
728 const struct Xt_com_list src_com[num_src_intersections],
729 int num_dst_intersections,
730 const struct Xt_com_list dst_com[num_dst_intersections],
732 int (*restrict src_removals_per_intersection)[2],
733 const int (*restrict dst_removals_per_intersection)[2],
737 MPI_Request * requests
738 =
xmalloc((
size_t)(num_src_intersections + 2 * num_dst_intersections) *
740 MPI_Request *restrict send_header_requests = requests,
741 *restrict recv_requests = requests + num_dst_intersections,
742 *restrict send_data_requests = recv_requests + num_src_intersections;
745 for (
int i = 0; i < num_src_intersections; ++i)
747 src_removals_per_intersection[i], 2, MPI_INT, src_com[i].rank,
749 comm, recv_requests + i), comm);
754 unsigned num_active_dst = 0, num_dst_changes = 0;
755 for (
int i = 0; i < num_dst_intersections; ++i) {
757 CAST_MPI_SEND_BUF(dst_removals_per_intersection[i]),
758 2, MPI_INT, dst_com[i].rank,
760 comm, send_header_requests + i), comm);
762 if (dst_removals_per_intersection[i][1] > 0) {
764 assert(dst_removals_per_intersection[i][1]
765 == dst_ext[num_active_dst].num_transfer_pos_ext
766 && dst_com[i].rank == dst_ext[num_active_dst].rank);
768 dst_ext[num_active_dst].transfer_pos_ext,
769 dst_removals_per_intersection[i][1],
770 MPI_2INT, dst_com[i].rank,
772 comm, send_data_requests + num_dst_changes),
776 num_active_dst += (unsigned)((dst_removals_per_intersection[i][0] == 0)
777 | (dst_removals_per_intersection[i][1] != 0));
781 xt_mpi_call(MPI_Waitall(num_src_intersections + num_dst_intersections,
782 requests, MPI_STATUSES_IGNORE), comm);
784 size_t total_num_pos_ext_to_recv = 0;
786 for (
size_t i = 0; i < (size_t)num_src_intersections; ++i)
787 total_num_pos_ext_to_recv += (
size_t)src_removals_per_intersection[i][1];
790 unsigned num_src_changes = 0;
791 if (total_num_pos_ext_to_recv > 0) {
794 =
xmalloc(total_num_pos_ext_to_recv *
sizeof(*src_updated_pos_ext));
799 for (
int i = 0; i < num_src_intersections; ++i)
800 if (src_removals_per_intersection[i][1] > 0) {
803 src_updated_pos_ext + offset,
804 src_removals_per_intersection[i][1], MPI_2INT,
807 comm, send_data_requests - num_src_changes), comm);
809 offset += (size_t)src_removals_per_intersection[i][1];
812 src_updated_pos_ext = NULL;
815 xt_mpi_call(MPI_Waitall((
int)num_src_changes + (
int)num_dst_changes,
816 send_data_requests - num_src_changes,
817 MPI_STATUSES_IGNORE), comm);
820 return src_updated_pos_ext;
826 size_t num_pos_updates,
827 const struct Xt_pos_ext pos_updates[num_pos_updates],
829 int single_match_only);
833 int num_dst_intersections,
834 const struct Xt_com_list intersections[num_dst_intersections],
838 const int (*removals_per_intersection)[2])
841 for (
size_t i = 0; i < (size_t)num_dst_intersections; ++i)
843 int intersection_size
846 int num_indices_to_remove = removals_per_intersection[i][0];
848 if (num_indices_to_remove != intersection_size) {}
else
852 struct Xt_pos_ext *pos_updates = resSets[resIdx].transfer_pos_ext;
854 (
size_t)removals_per_intersection[i][1],
855 pos_updates, resSets + resIdx, 1);
859 assert(resIdx == (
size_t)resCount);
867 for (
size_t i = 0; i < (size_t)num_pos_ext; ++i) {
868 int start = pos_ext[i].start,
869 size = pos_ext[i].size,
871 if (max > max_pos) max_pos = max;
879 int num_intersections,
880 const struct
Xt_com_list intersections[num_intersections],
883 const int (*restrict removals_per_intersection)[2],
887 int new_num_intersections = 0;
889 size_t intersection_pos_ext = 0;
892 for (
int i = 0; i < num_intersections; ++i) {
894 int intersection_size
897 int num_indices_to_remove = removals_per_intersection[i][0];
900 if (num_indices_to_remove != intersection_size) {
903 (
size_t)removals_per_intersection[i][1],
904 pos_updates + intersection_pos_ext,
905 resSets + new_num_intersections, 0);
908 resSets[new_num_intersections].num_transfer_pos_ext,
909 resSets[new_num_intersections].transfer_pos_ext);
910 if (max > max_pos) max_pos = max;
912 resSets[new_num_intersections].transfer_pos = NULL;
913 resSets[new_num_intersections].num_transfer_pos
914 = intersection_size - num_indices_to_remove;
915 resSets[new_num_intersections].rank = intersections[i].rank;
916 new_num_intersections++;
917 intersection_pos_ext += (size_t)removals_per_intersection[i][1];
929 struct Xt_stripe *restrict intersection_idxstripes,
930 size_t num_pos_updates,
931 const struct Xt_pos_ext *restrict pos_updates)
934 size_t num_refined_intersection_idxstripes = 0,
935 size_refined_intersection_idxstripes = num_pos_updates;
936 struct Xt_stripe *restrict refined_intersection_idxstripes
937 =
xmalloc(size_refined_intersection_idxstripes
938 *
sizeof (*refined_intersection_idxstripes));
940 int nstrides_psum = 0;
941 for (
size_t i_pos_ext = 0; i_pos_ext < num_pos_updates; ++i_pos_ext)
943 int pos = pos_updates[i_pos_ext].start;
944 int size = pos_updates[i_pos_ext].size;
945 while (nstrides_psum + intersection_idxstripes[i_stripe].
nstrides <= pos)
947 nstrides_psum += intersection_idxstripes[i_stripe].nstrides;
951 int instripe_pos = pos - nstrides_psum;
953 size_refined_intersection_idxstripes,
954 num_refined_intersection_idxstripes + 1);
955 struct Xt_stripe cur_stripe = intersection_idxstripes[i_stripe];
956 int cur_stripe_nstrides = cur_stripe.
nstrides;
957 int overlap =
imin(cur_stripe_nstrides - instripe_pos, size);
962 refined_intersection_idxstripes[num_refined_intersection_idxstripes]
964 ++num_refined_intersection_idxstripes;
965 i_stripe += (instripe_pos + overlap == cur_stripe_nstrides);
966 nstrides_psum += (instripe_pos + overlap == cur_stripe_nstrides)
967 ? cur_stripe_nstrides : 0;
972 free(intersection_idxstripes);
973 *num_stripes_ = (int)num_refined_intersection_idxstripes;
974 return refined_intersection_idxstripes;
987 size_t num_pos_updates,
988 const struct Xt_pos_ext pos_updates[num_pos_updates],
990 int single_match_only)
992 struct Xt_stripe *intersection_idxstripes;
995 &intersection_idxstripes,
998 intersection_idxstripes
1000 num_pos_updates, pos_updates);
1008 mypart_idxlist, num_stripes, intersection_idxstripes,
1011 assert(retval == 0);
1012 free(intersection_idxstripes);
1016 int n_out,
const struct exchange_ext *restrict out_msg,
1021 MPI_Request * requests
1022 =
xmalloc((
size_t)(n_in + 2 * n_out) *
sizeof(*requests));
1023 MPI_Request *send_header_requests = requests,
1024 *recv_requests = requests + n_out,
1025 *send_data_requests = recv_requests + n_in;
1029 for (
int i = 0; i < n_in; ++i)
1031 &(remote_out_msg[i].num_transfer_pos_ext), 1, MPI_INT,
1034 comm, recv_requests + i), comm);
1037 for (
int i = 0; i < n_out; ++i) {
1039 CAST_MPI_SEND_BUF(&(out_msg[i].num_transfer_pos_ext)),
1040 1, MPI_INT, out_msg[i].rank,
1042 comm, send_header_requests + i), comm);
1045 CAST_MPI_SEND_BUF(out_msg[i].transfer_pos_ext),
1046 out_msg[i].num_transfer_pos_ext,
1047 MPI_2INT, out_msg[i].rank,
1049 comm, send_data_requests + i),
1055 MPI_Waitall(n_out + n_in, send_header_requests, MPI_STATUSES_IGNORE), comm);
1057 size_t total_num_pos_ext_to_recv = 0;
1059 for (
size_t i = 0; i < (size_t)n_in; ++i)
1060 total_num_pos_ext_to_recv +=
1061 (
size_t)(remote_out_msg[i].num_transfer_pos_ext);
1064 if (total_num_pos_ext_to_recv > 0) {
1066 transfer_pos_ext_buffer
1067 =
xmalloc(total_num_pos_ext_to_recv *
sizeof(*transfer_pos_ext_buffer));
1070 struct Xt_pos_ext *curr_transfer_pos_ext = transfer_pos_ext_buffer;
1071 for (
int i = 0; i < n_in; ++i) {
1073 curr_transfer_pos_ext,
1074 remote_out_msg[i].num_transfer_pos_ext, MPI_2INT,
1077 comm, recv_requests + i), comm);
1079 remote_out_msg[i].transfer_pos_ext = curr_transfer_pos_ext;
1080 curr_transfer_pos_ext += remote_out_msg[i].num_transfer_pos_ext;
1083 transfer_pos_ext_buffer = NULL;
1087 MPI_Waitall(n_in + n_out, recv_requests, MPI_STATUSES_IGNORE), comm);
1090 return transfer_pos_ext_buffer;
1096 int buffer_size = 0;
1097 for (
int i = 0; i < n; ++i)
1098 if (msg[i].transfer_pos == NULL && msg[i].num_transfer_pos > buffer_size)
1101 int *transfer_pos_buffer
1103 ?
xmalloc((
size_t)buffer_size *
sizeof(*transfer_pos_buffer))
1106 for (
int i = 0; i < n; ++i) {
1109 int *restrict transfer_pos;
1110 size_t num_transfer_pos = (size_t)(msg[i].num_transfer_pos);
1111 if (msg[i].transfer_pos != NULL) {
1114 transfer_pos = transfer_pos_buffer;
1116 (
size_t)(msg[i].num_transfer_pos_ext), msg[i].transfer_pos_ext,
1117 num_transfer_pos, transfer_pos);
1124 size_t num_transfer_pos_ext =
count_pos_ext(num_transfer_pos, transfer_pos);
1126 if (num_transfer_pos_ext != (
size_t)(msg[i].num_transfer_pos_ext)) {
1131 num_transfer_pos_ext *
sizeof(*transfer_pos_ext)));
1136 num_transfer_pos, transfer_pos, num_transfer_pos_ext, transfer_pos_ext);
1139 if (buffer_size > 0) free(transfer_pos_buffer);
1147 size_t buffer_size = 0;
1148 for (
int i = 0; i < n; ++i) {
1149 assert(msg[i].transfer_pos == NULL
1150 && permutation_msg[i].transfer_pos == NULL);
1151 size_t curr_buffer_size
1152 = (size_t)(msg[i].num_transfer_pos)
1153 + (size_t)(permutation_msg[i].num_transfer_pos);
1154 if (curr_buffer_size > buffer_size) buffer_size = curr_buffer_size;
1157 int *transfer_pos_buffer
1159 ?
xmalloc(buffer_size *
sizeof(*transfer_pos_buffer))
1162 for (
int i = 0; i < n; ++i) {
1165 size_t num_transfer_pos = (size_t)(msg[i].num_transfer_pos);
1167 int *restrict transfer_pos = transfer_pos_buffer;
1169 (
size_t)(msg[i].num_transfer_pos_ext), msg[i].transfer_pos_ext,
1170 num_transfer_pos, transfer_pos);
1173 int *permutation = transfer_pos_buffer + num_transfer_pos;
1175 (
size_t)(permutation_msg[i].num_transfer_pos_ext),
1176 permutation_msg[i].transfer_pos_ext, num_transfer_pos, permutation);
1182 size_t num_transfer_pos_ext =
count_pos_ext(num_transfer_pos, permutation);
1184 if (num_transfer_pos_ext !=
1185 (
size_t)(permutation_msg[i].num_transfer_pos_ext)) {
1189 =
xrealloc(permutation_msg[i].transfer_pos_ext,
1190 num_transfer_pos_ext *
sizeof(*transfer_pos_ext));
1195 num_transfer_pos, permutation, num_transfer_pos_ext, transfer_pos_ext);
1198 if (buffer_size > 0) free(transfer_pos_buffer);
1206 xt_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1209 xmalloc((
size_t)n_in *
sizeof(*remote_out_msg));
1211 for (
int i = 0; i < n_in; ++i) {
1212 remote_out_msg[i].rank = in_msg[i].
rank;
1214 remote_out_msg[i].transfer_pos = NULL;
1220 n_out, out_msg, n_in, in_msg, remote_out_msg, tag_offset, comm);
1229 free(transfer_pos_ext_buffer);
1230 free(remote_out_msg);
1239 int n_out = xmap_intersection_ext_new->
n_out;
1240 int n_in = xmap_intersection_ext_new->
n_in;
1242 *out_msg = in_msg + n_in;
1244 int tag_offset = xmap_intersection_ext_new->
tag_offset;
1246 switch ((
int)
type) {
1256 Xt_abort(comm,
"ERROR(xmap_intersection_ext_reorder):invalid reorder "
1257 "type", __FILE__, __LINE__);
1260 return (
Xt_xmap)xmap_intersection_ext_new;
1271 size_t *num_pos_ext,
1274 size_t num_orig_pos,
const int *orig_pos,
1277 (void)num_orig_pos_ext;
1290 for (
size_t j = 0; j < num_orig_pos; ++j) {
1300 = *pos_ext =
xmalloc(num_pos_ext_ *
sizeof (*pos_ext_));
1307 const int *src_positions,
1308 const int *dst_positions) {
1311 size_t max_num_pos = 0;
1312 size_t n = (size_t)xmie_orig->
n_in + (
size_t)xmie_orig->
n_out;
1314 for (
size_t i = 0; i < n; ++i)
1315 if ((
size_t)msg[i].num_transfer_pos > max_num_pos)
1319 ?
xmalloc((
size_t)max_num_pos *
sizeof(*pos_buffer))
1339 size_t *num_pos_ext,
1342 size_t num_orig_pos,
const int *orig_pos,
1345 (void)num_orig_pos; (void)orig_pos;
1350 size_t size_pos_ext = new_num_pos_ext *
sizeof (**pos_ext);
1353 for (
int i = 0; i < num_repetitions; ++i) {
1355 pos_ext_ + (size_t)i * num_orig_pos_ext;
1356 const int curr_displacement = displacements[i];
1357 for (
size_t j = 0; j < num_orig_pos_ext; ++j) {
1358 int start = orig_pos_ext[j].
start + curr_displacement,
1366 *num_pos_ext = new_num_pos_ext;
1373 const int src_displacements[num_repetitions],
1374 const int dst_displacements[num_repetitions]) {
1380 .num_repetitions = num_repetitions,
1381 .displacements = src_displacements },
1385 .displacements = dst_displacements });
1410 .get_num_transfer_pos_ext
1428 if (xmap_intersection_ext->
n_in == 0)
1434 iter->
msg = xmap_intersection_ext->
msg;
1444 if (xmap_intersection_ext->
n_out == 0)
1450 iter->
msg = xmap_intersection_ext->
msg + xmap_intersection_ext->
n_in;
1466 if (iter_intersection == NULL || iter_intersection->
msgs_left == 0)
1469 iter_intersection->
msg++;
1477 assert(iter != NULL);
1484 assert(iter != NULL);
1486 if ((!msg->num_transfer_pos) | (msg->transfer_pos != NULL)) { }
else {
1491 (
size_t)msg->num_transfer_pos_ext, msg->transfer_pos_ext,
1494 return msg->transfer_pos;
1499 assert(iter != NULL);
1505 assert(iter != NULL);
1511 assert(iter != NULL);
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
integer, parameter, public sp
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
void xt_quicksort_int(int a[], size_t n)
void xt_quicksort_int_permutation(int a[], size_t n, int permutation[])
struct Xt_pos_ext * pos_ext
struct exchange_ext msg[]
const struct Xt_xmap_vtable * vtable
struct exchange_ext * msg
const struct Xt_xmap_iter_vtable * vtable
int(* next)(Xt_xmap_iter iter)
MPI_Comm(* get_communicator)(Xt_xmap)
struct Xt_pos_ext * transfer_pos_ext
const int *restrict displacements
const int * displacements
struct Xt_pos_ext_vec cover
static int isign_mask(int x)
static int imin(int a, int b)
void xt_cover_range_append(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_ext range)
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
bool xt_idxlist_pos_ext_is_full_cover(Xt_idxlist idxlist, struct Xt_pos_ext_vec cover)
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, bool forward, size_t search_start_pos)
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
int xt_idxlist_get_num_indices(Xt_idxlist idxlist)
void xt_idxlist_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
int xt_idxlist_get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only)
MPI_Comm xt_mpi_comm_smart_dup(MPI_Comm comm, int *tag_offset)
void xt_mpi_comm_smart_dedup(MPI_Comm *comm, int tag_offset)
#define xt_mpi_call(call, comm)
@ xt_mpi_tag_xmap_intersection_data_exchange
@ xt_mpi_tag_xmap_intersection_header_exchange
exchange map declarations
@ XT_REORDER_RECV_UP
optimise data access on receiver side
@ XT_REORDER_NONE
no reordering
@ XT_REORDER_SEND_UP
optimise data access on sender side
contains declaration for the exchange map data structure
Utility functions shared by xt_xmap_intersection and xt_xmap_intersection_ext.
static void print_miss_msg(Xt_idxlist dst_idxlist, int missing_pos, MPI_Comm comm, const char *source, int line) __attribute__((noreturn))
static size_t count_pos_ext(size_t num_pos, const int *restrict pos)
static void generate_pos(size_t num_pos_ext, const struct Xt_pos_ext *restrict pos_ext, size_t num_pos, int *restrict pos)
static void generate_pos_ext(size_t num_pos, const int *restrict pos, size_t num_pos_ext, struct Xt_pos_ext *restrict pos_ext)
static const struct Xt_xmap_vtable xmap_intersection_vtable
static Xt_xmap xmap_intersection_ext_copy(Xt_xmap xmap)
static const struct Xt_xmap_iter_vtable xmap_iterator_intersection_ext_vtable
static int xmap_intersection_ext_get_num_destinations(Xt_xmap xmap)
struct Xt_xmap_iter_intersection_ext_ * Xt_xmap_iter_intersection_ext
static Xt_xmap xmap_intersection_ext_copy_(Xt_xmap xmap, int num_repetitions, Xt_pos_ext_copy pe_cpy_in, void *peci_state, Xt_pos_ext_copy pe_cpy_out, void *peco_state)
static int xmap_intersection_ext_get_max_dst_pos(Xt_xmap xmap)
static Xt_xmap xmap_intersection_ext_reorder(Xt_xmap xmap, enum xt_reorder_type type)
static Xt_xmap_iter xmap_intersection_ext_get_out_iterator(Xt_xmap xmap)
static void sort_transfer_pos_ext(int n, struct exchange_ext *msg)
static struct Xt_pos_ext * exchange_pos_ext_modifications(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], struct exchange_ext dst_ext[num_dst_intersections], int(*restrict src_removals_per_intersection)[2], const int(*restrict dst_removals_per_intersection)[2], int tag_offset, MPI_Comm comm)
static void xmap_intersection_ext_get_destination_ranks(Xt_xmap xmap, int *ranks)
static void remap_dst_intersections(int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist mypart_idxlist, int resCount, struct exchange_ext resSets[resCount], const int(*removals_per_intersection)[2])
Xt_xmap xt_xmap_intersection_ext_new(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, MPI_Comm comm)
static struct Xt_pos_ext * exchange_transfer_pos_ext(int n_out, const struct exchange_ext *restrict out_msg, int n_in, const struct exchange_ext *restrict in_msg, struct exchange_ext *restrict remote_out_msg, int tag_offset, MPI_Comm comm)
static const struct Xt_pos_ext * xmap_intersection_ext_iterator_get_transfer_pos_ext(Xt_xmap_iter iter)
static void xmap_intersection_ext_get_source_ranks(Xt_xmap xmap, int *ranks)
static void generate_transfer_ext(struct Xt_xmap_intersection_ext_ *xmap, int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist_local, Xt_idxlist dst_idxlist_local, MPI_Comm comm)
static int xmap_intersection_ext_get_num_sources(Xt_xmap xmap)
static Xt_xmap xmap_intersection_ext_spread(Xt_xmap xmap, int num_repetitions, const int src_displacements[num_repetitions], const int dst_displacements[num_repetitions])
static int const * xmap_intersection_ext_iterator_get_transfer_pos(Xt_xmap_iter iter)
static int update_positions(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state_)
static struct tes_result generate_dir_transfer_pos_ext_src(int num_intersections, const struct Xt_com_list intersections[num_intersections], Xt_idxlist mypart_idxlist, struct exchange_ext *resSets, const int(*restrict removals_per_intersection)[2], const struct Xt_pos_ext *pos_updates)
static void xmap_intersection_ext_msg_copy(size_t nmsg, const struct exchange_ext *restrict msg, int *nmsg_copy, struct exchange_ext *restrict msg_copy, int *max_pos_, int num_repetitions, Xt_pos_ext_copy pos_ext_copy, void *pec_state)
static int xmap_intersection_ext_iterator_get_num_transfer_pos(Xt_xmap_iter iter)
static int xmap_intersection_ext_get_max_src_pos(Xt_xmap xmap)
static Xt_xmap xmap_intersection_ext_update_positions(Xt_xmap xmap, const int *src_positions, const int *dst_positions)
static void reorder_transfer_pos_ext(int n_out, int n_in, struct exchange_ext *out_msg, struct exchange_ext *in_msg, int tag_offset, MPI_Comm comm)
static void remap_intersection(Xt_idxlist mypart_idxlist, Xt_idxlist intersection, size_t num_pos_updates, const struct Xt_pos_ext pos_updates[num_pos_updates], struct exchange_ext *resSet, int single_match_only)
static void xmap_intersection_ext_delete(Xt_xmap xmap)
static Xt_xmap_iter xmap_intersection_ext_get_in_iterator(Xt_xmap xmap)
static Xt_xmap_intersection_ext xmie(void *xmap)
static struct Xt_pos_ext * get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, int single_match_only)
static MPI_Comm xmap_intersection_ext_get_communicator(Xt_xmap xmap)
static int xmap_intersection_ext_iterator_get_num_transfer_pos_ext(Xt_xmap_iter iter)
static void cut_pos_ext_from_pos_exts(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *pos_exts)
static void sort_transfer_pos_ext_permutation(int n, struct exchange_ext *msg, struct exchange_ext *permutation_msg)
static int xmap_intersection_ext_iterator_get_rank(Xt_xmap_iter iter)
static struct Xt_stripe * refine_stripes(int *num_stripes_, struct Xt_stripe *restrict intersection_idxstripes, size_t num_pos_updates, const struct Xt_pos_ext *restrict pos_updates)
static struct Xt_pos_ext_overlap Xt_get_pos_ext_overlap(struct Xt_pos_ext a, struct Xt_pos_ext b)
static int pos_ext_copy_verbatim(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)
static void xt_free_exchange_ext(size_t num_msg, struct exchange_ext *restrict msg)
static struct ted_result generate_dir_transfer_pos_ext_dst(int num_intersections, const struct Xt_com_list intersections[num_intersections], Xt_idxlist mypart_idxlist, struct exchange_ext *resSets, int(*restrict dst_removals_per_intersection)[2])
static Xt_xmap_iter_intersection_ext xmiei(void *iter)
static void xmap_intersection_ext_iterator_delete(Xt_xmap_iter iter)
struct Xt_xmap_intersection_ext_ * Xt_xmap_intersection_ext
static int pos_ext_find_max_pos(int num_pos_ext, const struct Xt_pos_ext *restrict pos_ext)
static int xmap_intersection_ext_iterator_next(Xt_xmap_iter iter)
int(* Xt_pos_ext_copy)(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)
static int pos_ext_copy_spread(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)