76 #define MIN(a,b) (((a)<(b))?(a):(b))
77 #define MAX(a,b) (((a)>(b))?(a):(b))
116 int single_match_only);
124 int * position,
int offset);
143 .get_positions_of_indices = NULL,
146 .get_positions_of_indices_off = NULL,
149 .get_bounding_box = NULL,
160 MPI_Aint base_address, start_address, nstrides_address, stride_address;
162 MPI_Get_address(&stripe, &base_address);
163 MPI_Get_address(&stripe.
start, &start_address);
164 MPI_Get_address(&stripe.
stride, &stride_address);
165 MPI_Get_address(&stripe.
nstrides, &nstrides_address);
167 enum { num_stripe_dt_elems = 3 };
168 int block_lengths[num_stripe_dt_elems] = {1,1,1};
169 MPI_Aint displacements[num_stripe_dt_elems]
170 = {start_address - base_address,
171 stride_address - base_address,
172 nstrides_address - base_address };
176 xt_mpi_call(MPI_Type_create_struct(num_stripe_dt_elems,
177 block_lengths, displacements, types,
178 &stripe_dt_unaligned), Xt_default_comm);
179 xt_mpi_call(MPI_Type_create_resized(stripe_dt_unaligned, 0,
180 (MPI_Aint)
sizeof(stripe),
182 xt_mpi_call(MPI_Type_free(&stripe_dt_unaligned), Xt_default_comm);
227 return ((a->range.min > b->range.min) -
228 (a->range.min < b->range.min));
240 stripes_sort[0].range = stripe_range;
241 stripes_sort[0].position = 0;
245 size_t num_stripes = (size_t)idxstripes->
num_stripes;
246 long long num_indices = (
long long)stripes[0].nstrides;
247 int sign_err = stripes[0].nstrides;
248 int have_zero_stride = stripes[0].stride == 0;
249 for (
size_t i = 1; i < num_stripes; ++i) {
251 stripes_sort[i].range = stripe_range;
252 stripes_sort[i].position = (int)i;
255 num_indices += (
long long)stripes[i].nstrides;
256 sign_err |= stripes[i].nstrides;
257 have_zero_stride |= stripes[i].stride == 0;
261 static const char template[] =
"ERROR: %s called with invalid stripes";
262 size_t buf_size =
sizeof (
template) - 2 + strlen(caller);
264 snprintf(msg, buf_size,
template, caller);
267 if (num_indices > 0) {
268 assert(num_indices <= INT_MAX);
270 stripes_sort[stripes_sort[0].position].inv_position = 0;
271 int stripes_do_overlap = 0;
272 for (
size_t i = 1; i < num_stripes; ++i) {
274 |= stripes_sort[i - 1].range.max >= stripes_sort[i].range.min;
275 stripes_sort[stripes_sort[i].position].inv_position = (int)i;
301 if (num_stripes > 0) {
308 body_size =
sizeof (
struct Xt_stripe) * (size_t)num_stripes;
312 = (
struct Xt_stripe *)(
void *)((
unsigned char *)idxstripes
314 idxstripes->stripes = stripes_assign;
315 idxstripes->num_stripes
335 if (num_stripes > 0) {
343 body_size =
sizeof (
struct Xt_stripe) * (size_t)num_stripes;
346 = (
struct Xt_stripe *)(
void *)((
unsigned char *)idxstripes + header_size);
348 memmove(stripes_moved, idxstripes,
sizeof (*stripes) * (
size_t)num_stripes);
349 idxstripes->stripes = stripes_moved;
350 idxstripes->num_stripes = num_stripes;
364 if (num_stripes > 0) {
385 if (data == NULL)
return;
398 int size_header, size_stripes = 0;
400 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
403 &size_stripes), comm);
405 return (
size_t)size_header + (size_t)size_stripes;
419 buffer_size, position, comm), comm);
423 buffer, buffer_size, position, comm), comm);
434 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
435 &num_stripes, 1, MPI_INT, comm), comm);
445 body_size =
sizeof (
struct Xt_stripe) * (size_t)num_stripes;
447 idxstripes->num_stripes = num_stripes;
450 = (
struct Xt_stripe *)(
void *)((
unsigned char *)idxstripes
452 idxstripes->stripes = stripes_assign;
453 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position, stripes_assign,
467 INSTR_DEF(instr,
"compute_intersection_fallback")
479 idxvec_from_stripes_dst);
501 b = (
Xt_int)(prev_a - q * b);
504 s = (
Xt_int)(prev_u - q * s);
507 t = (
Xt_int)(prev_v - q * t);
521 INSTR_DEF(instr,
"get_stripe_intersection")
524 struct Xt_bounded_stripe {
528 struct Xt_bounded_stripe bsa, bsb, bsi;
530 Xt_int stride_zero_mask_a = (stripe_a.stride != 0) - 1,
531 stride_zero_mask_b = (stripe_b.stride != 0) - 1;
534 bsa.min = (
Xt_int)(stripe_a.start
535 + (mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
536 bsa.max = (
Xt_int)(stripe_a.start
537 + (~mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
539 bsa.representative = stripe_a.start;
542 bsb.min = (
Xt_int)(stripe_b.start
543 + (mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
544 bsb.max = (
Xt_int)(stripe_b.start
545 + (~mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
547 bsb.representative = stripe_b.start;
549 bsa.stride = (
Xt_int)((stripe_a.stride & ~stride_zero_mask_a)
550 | (stride_zero_mask_a & 1));
551 bsb.stride = (
Xt_int)((stripe_b.stride & ~stride_zero_mask_b)
552 | (stride_zero_mask_b & 1));
556 Xt_int abs_stride_b = XT_INT_ABS(bsb.stride);
560 = (
Xt_int)(bsb.representative
561 + (start_diff/abs_stride_b
562 + (start_diff%abs_stride_b > abs_stride_b/2))
564 start_diff = (
Xt_long)bsb.representative - bsa.representative;
567 Xt_idiv start_diff_abs_stride_b_div
570 = (
Xt_int)(bsb.representative
571 + (start_diff_abs_stride_b_div.
quot
572 + start_diff_abs_stride_b_div.
rem > abs_stride_b/2)
574 start_diff =
xiisub(bsb.representative, bsa.representative);
576 Xt_int abs_stride_a = XT_INT_ABS(bsa.stride);
578 bsi.min =
MAX(bsa.min, bsb.min);
579 bsi.max =
MIN(bsa.max, bsb.max);
587 if (bsb.representative != bsa.representative)
606 = (
Xt_int)(bsa.representative
607 + ((bsb.representative - bsa.representative) * eg.
u
608 * abs_stride_a / eg.
gcd));
610 Xt_int abs_bsi_stride = XT_INT_ABS(temp_stride),
611 r_diff = (
Xt_int)(bsi.min - some_rep),
612 steps = (
Xt_int)(r_diff / abs_bsi_stride);
613 steps = (
Xt_int)(steps + (steps * abs_bsi_stride < r_diff));
614 min_rep = (
Xt_int)(some_rep + steps * abs_bsi_stride);
615 bsi.representative = (
Xt_int)min_rep;
617 nstrides = (int)((bsi.max - min_rep)/temp_stride +
llsign(temp_stride));
619 = ((((bsb.representative - bsa.representative) % eg.
gcd) == 0)
620 & (bsi.stride == temp_stride || abs(
nstrides) == 1));
622 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
623 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
626 intersection.start = (
Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
629 #define ABS(x) (((x) < 0) ? -(x) : (x))
631 bsi.stride = (
Xt_int)temp_stride;
636 bool min_rep_in_range;
639 if (bsb.representative != bsa.representative) {
660 min_rep_in_range =
true;
661 some_rep = bsa.representative;
664 Xt_long abs_bsi_stride = ABS(temp_stride),
665 r_diff = bsi.min - some_rep,
666 steps = r_diff / abs_bsi_stride;
668 steps = steps + (steps * abs_bsi_stride < r_diff);
671 min_rep += steps * abs_bsi_stride;
673 bsi.representative = (
Xt_int)min_rep;
675 int min_rep_mask = ~((int)min_rep_in_range - 1);
676 nstrides = (int)((((bsi.max - min_rep)/temp_stride)+
xlsign(temp_stride))
678 | (~min_rep_mask & (bsb.representative == bsa.representative));
680 = ((start_diff%eg.
gcd == 0) & (bsi_stride_in_range || abs(
nstrides) == 1));
682 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
683 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
686 intersection.start = (
Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
690 bsi.stride = (
Xt_int)temp_stride.
lo;
697 bool min_rep_in_range;
700 if (bsb.representative != bsa.representative) {
703 =
xiimul(bsb.representative - bsa.representative, eg.
u);
720 some_rep =
xliadd(temp_3.
quot, bsa.representative);
722 min_rep_in_range =
true;
723 some_rep =
xi2l(bsa.representative);
727 r_diff =
xilsub(bsi.min, some_rep);
731 && steps.
rem.
lo != 0));
733 min_rep =
xlladd(some_rep,
739 bsi.representative = (
Xt_int)min_rep.lo;
741 if (min_rep_in_range)
744 nstrides = bsb.representative == bsa.representative;
746 = ((((bsb.representative - bsa.representative) % eg.
gcd) == 0)
747 & (bsi_stride_in_range || abs(
nstrides) == 1));
749 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
754 intersection.start = ((bsa.stride >= 0) ? (
Xt_int)min_rep.lo : max_rep);
759 intersection.nstrides
761 & ~((int)stride_zero_mask_a & (
int)stride_zero_mask_b))
762 | (stripe_b.nstrides & (int)stride_zero_mask_a & (
int)stride_zero_mask_b);
772 INSTR_DEF(instr,
"idxstripes_compute_intersection")
775 struct Xt_stripe *restrict inter_stripes = NULL;
776 size_t num_inter_stripes = 0;
777 size_t inter_stripes_array_size = 0;
781 *restrict dst_stripes_sort = idxstripes_dst->
stripes_sort;
783 *restrict stripes_dst = idxstripes_dst->
stripes;
785 size_t i_src = 0, i_dst = 0;
786 size_t num_stripes_src = (size_t)idxstripes_src->
num_stripes,
787 num_stripes_dst = (
size_t)idxstripes_dst->
num_stripes;
789 while ((i_src < num_stripes_src) &
790 (i_dst < num_stripes_dst)) {
792 while (i_src < num_stripes_src &&
793 src_stripes_sort[i_src].range.max
794 < dst_stripes_sort[i_dst].range.min) ++i_src;
796 if ( i_src >= num_stripes_src )
break;
798 while (i_dst < num_stripes_dst &&
799 dst_stripes_sort[i_dst].range.max
800 < src_stripes_sort[i_src].range.min) ++i_dst;
802 if ( i_dst >= num_stripes_dst )
break;
804 if ((src_stripes_sort[i_src].range.min
805 <= dst_stripes_sort[i_dst].range.max)
806 & (src_stripes_sort[i_src].range.max
807 >= dst_stripes_sort[i_dst].range.min)) {
809 num_inter_stripes+1);
812 inter_stripes[num_inter_stripes] = intersection_stripe =
814 stripes_dst[dst_stripes_sort[i_dst].position]);
815 num_inter_stripes += intersection_stripe.
nstrides > 0;
818 if (dst_stripes_sort[i_dst].range.max
819 < src_stripes_sort[i_src].range.max)
825 if (num_inter_stripes) {
827 struct Xt_stripe prev_stripe = inter_stripes[0];
828 if (prev_stripe.
stride < 0) {
834 inter_stripes[0] = prev_stripe;
836 for (
size_t i = 1; i < num_inter_stripes; ++i) {
837 struct Xt_stripe stripe = inter_stripes[i];
845 == (prev_stripe.
start
849 inter_stripes[j].nstrides = prev_stripe.
nstrides;
853 inter_stripes[++j] = stripe;
854 prev_stripe = stripe;
857 num_inter_stripes = j + 1;
875 if ((idxstripes_src->
flags | idxstripes_dst->flags)
892 INSTR_DEF(instr,
"idxstripes_get_indices")
898 size_t num_stripes = (size_t)idxstripes->
num_stripes;
899 for (
size_t i = 0; i < num_stripes; ++i)
900 for (
Xt_int j = 0; j < stripes[i].nstrides; ++j)
929 INSTR_DEF(instr,
"idxstripes_get_index_stripes")
935 size_t num_temp_stripes, num_stripes_ = (size_t)idxstripes->
num_stripes;
937 num_temp_stripes = num_stripes_;
938 for (
size_t i = 0; i < num_stripes_; ++i)
939 if (stripes_[i].
stride != 1)
940 num_temp_stripes += (size_t)stripes_[i].
nstrides - 1U;
941 struct Xt_stripe *restrict temp_stripes = *stripes
942 =
xrealloc(*stripes, num_temp_stripes *
sizeof(*temp_stripes));
943 *num_stripes = (int)num_temp_stripes;
944 num_temp_stripes = 0;
945 for (
size_t i = 0; i < num_stripes_; ++i)
946 if (stripes_[i].
stride == 1)
947 temp_stripes[num_temp_stripes++] = stripes_[i];
949 for (
size_t j = 0; j < (size_t)stripes_[i].
nstrides; ++j) {
950 temp_stripes[num_temp_stripes].start
952 temp_stripes[num_temp_stripes].nstrides = 1;
953 temp_stripes[num_temp_stripes].stride = 1;
964 INSTR_DEF(instr,
"idxstripes_get_index_at_position")
969 if (position >= 0 && position < idxlist->num_indices) {
972 size_t num_stripes = (size_t)idxstripes->
num_stripes;
974 while (i < num_stripes && position >= stripes[i].
nstrides) {
975 position-= stripes[i].nstrides;
991 int num_pos,
Xt_int *index,
994 INSTR_DEF(instr,
"idxstripes_get_indices_at_positions")
1005 int stripe_start_pos = 0;
1006 int undef_count = 0;
1008 for (
int ipos = 0; ipos < num_pos; ipos++) {
1010 int seek_pos = positions[ipos];
1012 if (seek_pos >= 0 && seek_pos < max_pos) {
1013 while (seek_pos < stripe_start_pos) {
1017 die(
"idxstripes_get_indices_at_positions: internal error:"
1018 " crossed 0-boundary");
1020 stripe_start_pos -= (int)stripes[istripe].
nstrides;
1023 while (seek_pos > stripe_start_pos + stripes[istripe].
nstrides - 1) {
1024 stripe_start_pos += (int)stripes[istripe].
nstrides;
1028 die(
"idxstripes_get_indices_at_positions: internal error:"
1029 " crossed upper boundary");
1033 sub_pos = seek_pos - stripe_start_pos;
1037 index[ipos] = undef_idx;
1056 int * position,
int offset) {
1058 INSTR_DEF(instr,
"idxstripes_get_position_of_index_off")
1065 size_t num_stripes = (size_t)idxstripes->
num_stripes, i = 0;
1066 Xt_int position_offset = 0;
1068 while (i < num_stripes && position_offset + stripes[i].
nstrides <= offset)
1069 position_offset = (
Xt_int)(position_offset + stripes[i++].
nstrides);
1071 for (; i < num_stripes;
1072 position_offset = (
Xt_int)(position_offset + stripes[i++].
nstrides)) {
1074 if ((stripes[i].
stride > 0 && index < stripes[i].
start)
1075 || (stripes[i].stride < 0 && index > stripes[i].start))
1080 if (rel_start % stripes[i].
stride)
continue;
1085 *position = (int)(rel_start/stripes[i].
stride + position_offset);
1101 size_t num_pos_exts_ = result->num_pos_ext,
1102 size_pos_exts_ = result->size_pos_ext;
1103 struct Xt_pos_ext *restrict pos_exts_ = result->pos_ext;
1106 if (num_pos_exts_ + 1 == size_pos_exts_)
1108 size_pos_exts_ += 16;
1110 result->pos_ext = pos_exts_ = (
struct Xt_pos_ext *)
1111 xrealloc(pos_exts_ - 1, (size_pos_exts_ + 1) *
sizeof (*pos_exts_)) + 1;
1112 result->size_pos_ext = size_pos_exts_;
1114 pos_exts_[num_pos_exts_] = pos_ext;
1115 result->num_pos_ext = num_pos_exts_ + 1;
1119 pos_exts_[num_pos_exts_ - 1].
size
1120 =
isign(pos_ext.
start - pos_exts_[num_pos_exts_ - 1].start)
1121 * (abs(pos_exts_[num_pos_exts_ - 1].
size) + abs(pos_ext.
size));
1136 const struct Xt_stripe *restrict stripes;
1137 db->stripes = stripes = idxstripes->
stripes;
1138 size_t num_db_stripes = (size_t)idxstripes->
num_stripes;
1140 int *restrict db_stripes_nstrides_psum
1141 =
xmalloc((num_db_stripes + 1)
1142 *
sizeof (db->stripes_nstrides_psum[0]));
1143 db_stripes_nstrides_psum[0] = 0;
1144 for (
size_t j = 0; j < num_db_stripes; ++j) {
1145 db_stripes_nstrides_psum[j + 1]
1146 = db_stripes_nstrides_psum[j] + stripes[j].nstrides;
1149 db->num_stripes = num_db_stripes;
1150 db->stripes_nstrides_psum = db_stripes_nstrides_psum;
1156 free(db->stripes_nstrides_psum);
1165 static inline size_t
1170 size_t left = 0, right = n - 1;
1171 if ((a && n > 0)) ;
else return n;
1172 while (left < right)
1177 size_t m = (left + right + 1) / 2;
1178 if (a[m].range.min > min_key)
1186 return a[right].range.min <= min_key ? right : n;
1195 size_t num_db_stripes = db->num_stripes;
1196 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1199 if (start_pos != num_db_stripes) {
1200 assert(db_stripes_sort[start_pos].
range.
min <= query_minmax.
max);
1201 size_t end_pos = start_pos;
1202 while (end_pos < num_db_stripes
1203 && db_stripes_sort[end_pos].
range.
min <= query_minmax.
max)
1207 size_t num_candidates;
1208 if (!db->stripes_do_overlap)
1210 while (start_pos > 0
1211 && (db_stripes_sort[start_pos - 1].
range.
max >= query_minmax.
min))
1213 num_candidates = end_pos - start_pos;
1214 if (candidates->
size < num_candidates) {
1217 * sizeof (candidates->
vec[0]));
1218 candidates->
size = num_candidates;
1220 candidates->
num = num_candidates;
1221 int *restrict candidates_vec = candidates->
vec;
1222 for (
size_t i = 0; i < num_candidates; ++i)
1223 candidates_vec[i] = db_stripes_sort[start_pos + i].
position;
1228 size_t min_candidate = start_pos;
1229 for (
size_t i = end_pos - 1; i != SIZE_MAX; --i)
1232 = ((db_stripes_sort[i].range.min <= query_minmax.
max)
1233 & (db_stripes_sort[i].
range.
max >= query_minmax.
min));
1234 num_candidates += predicate;
1235 size_t predicate_mask = predicate - 1;
1236 min_candidate = (min_candidate & predicate_mask)
1237 | (i & ~predicate_mask);
1239 if (candidates->
size < num_candidates + 1)
1242 (num_candidates + 1)
1243 * sizeof (candidates[0]));
1244 candidates->
size = num_candidates + 1;
1246 candidates->
num = num_candidates;
1247 int *restrict candidates_vec = candidates->
vec;
1249 for (
size_t i = min_candidate; i < end_pos; ++i) {
1250 candidates_vec[j] = db_stripes_sort[i].position;
1251 j += ((size_t)(db_stripes_sort[i].
range.
min <= query_minmax.
max)
1252 & (size_t)(db_stripes_sort[i].
range.
max >= query_minmax.
min));
1254 assert(j == num_candidates);
1258 candidates->
num = 0;
1265 size_t expansion = 0;
1269 struct Xt_stripe *restrict expanded_stripes
1270 =
xmalloc((num_stripes + expansion) *
sizeof (expanded_stripes[0]));
1272 for (
size_t i = 0; i < num_stripes; ++i) {
1274 if (stripe.
stride == 0) {
1275 for (
size_t k = 0; k < (size_t)stripe.
nstrides; ++k)
1281 expanded_stripes[j] = stripe;
1287 (
int)(num_stripes + expansion));
1297 bool single_match_only,
1298 size_t num_candidates,
1299 int *restrict candidates);
1305 const struct Xt_stripe stripes[num_stripes],
1308 int single_match_only)
1310 size_t unmatched = 0;
1318 idxstripes->stripes);
1330 struct int_vec candidates = { .
size = 0, .vec = NULL };
1331 for (
size_t i = 0; i < (size_t)num_stripes; ++i) {
1341 query, &stripes_db, &result, &cover, single_match_only != 0,
1342 candidates.
num, candidates.
vec);
1343 }
while ((stripes[i].
stride == 0) & (--j > 0));
1345 free(candidates.
vec);
1354 free(idxstripes->stripes);
1359 return (
int)unmatched;
1368 size_t num_candidates,
1369 int *restrict candidates);
1383 bool single_match_only,
1384 size_t num_candidates,
1385 int *restrict candidates);
1393 bool single_match_only,
1394 size_t num_candidates,
1395 int *restrict candidates)
1397 size_t unmatched = 0;
1398 if (single_match_only)
1400 query, pos_ext2add, stripes_lookup, result, cover,
1401 num_candidates, candidates);
1415 bool single_match_only,
1416 size_t num_candidates,
1417 int *restrict candidates)
1419 size_t unmatched = 0;
1421 const struct Xt_stripe *restrict db_stripes = db->stripes;
1422 const int *restrict db_stripes_nstrides_psum = db->stripes_nstrides_psum;
1423 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1424 for (
size_t j = 0; j < num_candidates; ++j) {
1425 size_t unsort_pos = (size_t)candidates[j];
1426 size_t sort_pos = (size_t)db_stripes_sort[unsort_pos].
inv_position;
1427 if ((query_minmax.
min <= db_stripes_sort[sort_pos].range.max)
1428 & (query_minmax.
max >= db_stripes_sort[sort_pos].range. min))
1432 struct Xt_stripe db_stripe = db_stripes[unsort_pos];
1442 int skipLen = (int)((overlap_start - query.
start) /
stride);
1451 query_head, db, result, cover, single_match_only,
1452 num_candidates - j - 1, candidates + j + 1);
1458 = (int)((overlap_start - db_stripe.
start) /
stride);
1459 int overlap_nstrides
1465 .stride = overlap_nstrides > 1 ? query.
stride : (
Xt_int)1,
1468 = db_stripes_nstrides_psum[unsort_pos] + db_stripe_skip,
1469 .size = overlap_nstrides
1470 }, db, result, cover, single_match_only, num_candidates - j - 1,
1471 candidates + j + 1);
1473 if (!(query.
nstrides -= overlap_nstrides))
1474 goto search_finished;
1476 query.
start = (
Xt_int)(overlap_start + stride * overlap_nstrides);
1487 query, db, result, cover, single_match_only,
1488 num_candidates - j, candidates + j);
1491 goto search_finished;
1511 size_t num_candidates,
1512 int *restrict candidates)
1517 if (pos_ext2add.
size == -1)
1518 pos_ext2add.
size = 1;
1522 int pos_ext2add_s = pos_ext2add.
start
1523 + (querySizeMaskNeg & (pos_ext2add.
size + 1)),
1524 pos_ext2add_e = pos_ext2add.
start
1525 + (~querySizeMaskNeg & (pos_ext2add.
size - 1));
1527 = { .
start = pos_ext2add_s, .end = pos_ext2add_e };
1529 size_t overlap_pos =
1531 if (overlap_pos == SIZE_MAX) {
1535 struct Xt_pos_ext *restrict pos_exts_ = cover->pos_ext;
1537 db_s = pos_exts_[overlap_pos].start
1538 + (dbSizeMaskNeg & (pos_exts_[overlap_pos].size + 1)),
1539 db_e = pos_exts_[overlap_pos].start
1540 + (~dbSizeMaskNeg & (pos_exts_[overlap_pos].size - 1));
1542 int lowQuerySkip = db_s - pos_ext2add_s;
1543 int lowDbSkip = -lowQuerySkip;
1544 lowQuerySkip = (int)((
unsigned)(lowQuerySkip + abs(lowQuerySkip))/2);
1545 lowDbSkip = (int)((
unsigned)(lowDbSkip + abs(lowDbSkip))/2);
1546 int overlapLen =
MIN(db_e - db_s - lowDbSkip + 1,
1547 abs(pos_ext2add.
size) - lowQuerySkip);
1548 int highQuerySkip = abs(pos_ext2add.
size) - lowQuerySkip - overlapLen;
1551 int querySkipLen = (~querySizeMaskNeg & lowQuerySkip)
1552 | (querySizeMaskNeg & -highQuerySkip),
1553 queryTailLen = (querySizeMaskNeg & -lowQuerySkip)
1554 | (~querySizeMaskNeg & highQuerySkip);
1557 int absQuerySkipLen = abs(querySkipLen);
1565 .size = querySkipLen
1569 query_skip, pos_ext2add_skip, db,
1570 result, cover, num_candidates, candidates);
1571 pos_exts_ = result->pos_ext;
1573 + stride * (
Xt_int)absQuerySkipLen);
1576 pos_ext2add.
start += querySkipLen;
1577 pos_ext2add.
size -= querySkipLen;
1588 query_head, db, result, cover,
true,
1589 num_candidates, candidates);
1590 pos_exts_ = result->pos_ext;
1596 int directedOverlapLen = (~querySizeMaskNeg & overlapLen)
1597 | (querySizeMaskNeg & -overlapLen);
1598 pos_ext2add.
start += directedOverlapLen;
1599 pos_ext2add.
size -= directedOverlapLen;
1622 #if defined _CRAYC && _RELEASE_MAJOR == 8 && _RELEASE_MINOR >= 5 && _RELEASE_MINOR < 7
1631 bool single_match_only,
1632 size_t num_candidates,
1633 int *restrict candidates)
1635 size_t unmatched = 0;
1636 const struct Xt_stripe *restrict db_stripes = db->stripes;
1637 size_t db_stripe_pos = (size_t)candidates[0];
1639 db_stripes[db_stripe_pos]);
1641 return (
struct unmatched_tail){ .unmatched = 0, .query_tail = query};
1643 int skipped = (int)((overlap.
start - query.start)/query.stride);
1647 (
struct Xt_stripe){ .start = query.start,
1648 .stride = skipped > 1 ? query.stride : (
Xt_int)1,
1649 .
nstrides = skipped}, db, result, cover,
1650 single_match_only, num_candidates - 1, candidates + 1);
1651 query.start = (
Xt_int)(query.start + skipped * query.stride);
1652 query.nstrides -= skipped;
1653 query.stride = query.nstrides > 1 ? query.stride : 1;
1662 = (int)((overlap.
start - db_stripes[db_stripe_pos].start)
1663 / db_stripes[db_stripe_pos].stride);
1664 int db_pos = db->stripes_nstrides_psum[db_stripe_pos] + db_stripe_skip;
1665 if (((overlap.
stride == query.stride)
1666 & (overlap.
stride == db_stripes[db_stripe_pos].stride))
1672 db, result, cover, single_match_only, num_candidates - 1, candidates + 1);
1673 query.nstrides -= overlap.
nstrides;
1674 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
1675 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
1677 else if ((overlap.
stride == query.stride)
1678 & (overlap.
stride == -db_stripes[db_stripe_pos].stride))
1683 .start = db_pos, .size = -overlap.
nstrides },
1684 db, result, cover, single_match_only,
1685 num_candidates - 1, candidates + 1);
1686 query.nstrides -= overlap.
nstrides;
1687 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
1688 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
1690 else if (overlap.
stride == query.stride)
1694 int db_step = (int)(overlap.
stride/db_stripes[db_stripe_pos].stride);
1697 for (
int i = 0; i < overlap.
nstrides; ++i, db_pos += db_step)
1700 (
struct Xt_pos_ext){ .start = db_pos, .size = 1 },
1701 db, result, cover, single_match_only,
1702 num_candidates - 1, candidates + 1);
1703 query.nstrides -= overlap.
nstrides;
1704 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
1705 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
1713 int stride_step = (int)(overlap.
stride / query.stride);
1714 int db_step = (int)(overlap.
stride/db_stripes[db_stripe_pos].stride);
1719 intervening = { .start = (
Xt_int)(query.start + query.stride),
1720 .stride = query.stride,
1721 .nstrides =
MIN(query.nstrides - 1, stride_step - 1) };
1725 .start = db_pos, .size = 1 },
1726 db, result, cover, single_match_only,
1727 num_candidates - 1, candidates + 1);
1729 if (--query.nstrides > 0) {
1732 intervening, db, result, cover, single_match_only,
1733 num_candidates - 1, candidates + 1);
1734 query.nstrides -= intervening.nstrides;
1736 query.start = (
Xt_int)(query.start + query.stride * stride_step);
1737 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
1741 return (
struct unmatched_tail){ .unmatched = unmatched, .query_tail = query};
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
struct Xt_stripe * stripes
Xt_int * index_array_cache
struct Xt_stripes_sort stripes_sort[]
struct Xt_idxlist_ parent
struct Xt_pos_ext * pos_ext
const struct Xt_stripe * stripes
int * stripes_nstrides_psum
const struct Xt_stripes_sort * stripes_sort
struct Xt_stripe_minmax range
struct Xt_stripe query_tail
void(* delete)(Xt_idxlist)
static Xt_tword xlimul(Xt_long a, Xt_int b)
static Xt_long xlladd(Xt_long a, Xt_long b)
static Xt_long xiisub(Xt_int a, Xt_int b)
static Xt_long xi2l(Xt_int a)
static Xt_ldiv xlldiv(Xt_long a, Xt_long b)
static Xt_long xliadd(Xt_long a, Xt_int b)
static Xt_idiv xlidivu(Xt_ulong a, Xt_uint b)
static int xlicmp_lt(Xt_long a, Xt_int b)
static Xt_long xiimul(Xt_int a, Xt_int b)
static bool xl_is_in_xt_int_range(Xt_long a)
static Xt_long xlabs(Xt_long a)
static Xt_long xilsub(Xt_int a, Xt_long b)
static Xt_long xlinc(Xt_long a, bool b)
static int xlsign(Xt_long x)
static int xlicmp_le(Xt_long a, Xt_int b)
static int xlicmp_ge(Xt_long a, Xt_int b)
static int isign_mask(int x)
static Xt_int Xt_isign_mask(Xt_int x)
static int xinlz(Xt_uint v)
static int imin(int a, int b)
static Xt_int Xt_isign(Xt_int x)
static long long llsign(long long x)
base definitions header file
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, bool forward, size_t search_start_pos)
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
Xt_idxlist xt_idxempty_new(void)
void xt_idxlist_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
Xt_idxlist xt_idxlist_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
void xt_idxlist_delete(Xt_idxlist idxlist)
Provide non-public declarations common to all index lists.
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
@ stripes_some_have_zero_stride_mask
@ stripes_do_overlap_mask
@ stripes_some_have_zero_stride_bit
static int idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static struct unmatched_tail idxstripes_complex_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static int compare_xtstripes(const void *a_, const void *b_)
static int idxstripes_get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe *stripes, int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only)
static void idxstripes_delete(Xt_idxlist data)
void xt_idxstripes_initialize(void)
static struct Xt_stripe get_stripe_intersection(struct Xt_stripe stripe_a, struct Xt_stripe stripe_b)
static int idxstripes_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static void create_stripes_lookup(struct Xt_stripes_lookup *restrict db, Xt_idxstripes idxstripes)
static size_t bsearch_stripes_sort(size_t n, const struct Xt_stripes_sort a[n], Xt_int min_key)
static void idxstripes_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
static struct extended_gcd extended_gcd(Xt_int a, Xt_int b)
void xt_idxstripes_finalize(void)
Xt_idxlist xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
Xt_idxlist xt_idxstripes_from_idxlist_new(Xt_idxlist idxlist_src)
static struct Xt_idxstripes_ * expand_zero_stripes(size_t num_stripes, const struct Xt_stripe *restrict stripes)
static void idxstripes_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static int idxstripes_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static size_t pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static void idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static int idxstripes_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static const Xt_int * idxstripes_get_indices_const(Xt_idxlist idxlist)
struct Xt_idxstripes_ * Xt_idxstripes
static void destroy_stripes_lookup(struct Xt_stripes_lookup *restrict db)
static void find_candidates(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct int_vec *candidates)
static Xt_idxlist compute_intersection_fallback(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst)
Xt_idxlist xt_idxstripes_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
static void append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
static const struct xt_idxlist_vtable idxstripes_vtable
static Xt_idxlist idxstripes_compute_intersection(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst)
static size_t idxstripes_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
Xt_idxlist xt_idxstripes_new(struct Xt_stripe const *stripes, int num_stripes)
static Xt_idxlist idxstripes_copy(Xt_idxlist idxlist)
static size_t idxstripes_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static Xt_int idxstripes_get_max_index(Xt_idxlist idxlist)
static Xt_int idxstripes_get_min_index(Xt_idxlist idxlist)
static Xt_idxlist idxstripes_aggregate(Xt_idxstripes idxstripes, const char *caller)
static size_t conditional_pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, size_t num_candidates, int *restrict candidates)
static MPI_Datatype stripe_dt
static bool xt_can_merge_pos_ext(struct Xt_pos_ext a, struct Xt_pos_ext b)
Xt_idxlist xt_idxvec_from_stripes_new(const struct Xt_stripe *stripes, int num_stripes)
#define xt_mpi_call(call, comm)
void(* xt_sort_int)(int *a, size_t n)
size_t xt_stripes_merge_copy(size_t num_stripes, struct Xt_stripe *stripes_dst, const struct Xt_stripe *stripes_src, bool lookback)
static struct Xt_stripe_minmax xt_stripe2minmax(struct Xt_stripe stripe)