72 #define MIN(a,b) (((a)<(b))?(a):(b))
109 int * position,
int offset);
113 int num_indices,
int *positions,
114 int single_match_only);
135 .get_positions_of_indices_off = NULL,
138 .get_bounding_box = NULL,
139 .idxlist_pack_code =
VECTOR,
173 die(
"number of indices passed to xt_idxvec_new must not be negative!");
176 size_t vector_size = (size_t)
num_indices *
sizeof (idxvec[0]),
182 Xt_int *vector_assign = (
Xt_int *)(
void *)((
unsigned char *)idxvec_obj + header_size);
183 idxvec_obj->vector = vector_assign;
184 memcpy(vector_assign, idxvec, vector_size);
185 idxvec_obj->sorted_vector = NULL;
186 idxvec_obj->sorted_vec_positions = NULL;
189 return (
void *)idxvec_obj;
196 else if (num_indices == 0)
199 die(
"number of indices passed to xt_idxvec_new must not be negative!");
202 idxvec_obj->vector = idxvec;
203 idxvec_obj->sorted_vector = NULL;
204 idxvec_obj->sorted_vec_positions = NULL;
205 return (
void *)idxvec_obj;
210 int * sorted_vec_pos,
int pos_offset) {
216 start = stripe.
start;
223 for (
int i = 0; i < stripe.
nstrides; ++i) {
225 sorted_vec_pos[i] = pos_offset + i * sign;
231 #define MAX(a,b) ((a) >= (b) ? (a) : (b))
238 if (num_stripes_ <= 0) {
244 size_t num_stripes = (size_t)num_stripes_;
245 Xt_int *restrict sorted_vector_assign
257 Xt_int (*restrict stripe_minmax)[num_stripes]
258 =
xmalloc(2 *
sizeof(*stripe_minmax));
259 int *restrict sorted_stripe_min_pos
260 =
xmalloc(num_stripes * 3 *
sizeof(*sorted_stripe_min_pos));
262 for(
size_t i = 0; i < num_stripes; ++i) {
263 Xt_int ofs = (
Xt_int)(stripes[i].stride * (stripes[i].nstrides - 1)),
265 stripe_minmax[0][i] = (
Xt_int)(stripes[i].start + (ofs & mask));
269 sorted_stripe_min_pos, 1);
271 int *restrict sorted_pos_prefix_sum = sorted_stripe_min_pos + num_stripes,
272 *restrict orig_pos_prefix_sum
273 =
xmalloc(num_stripes *
sizeof(*orig_pos_prefix_sum));
276 for (
size_t i = 0; i < num_stripes; ++i) {
277 orig_pos_prefix_sum[i] = accum;
281 for (
size_t i = 0; i < num_stripes; ++i) {
282 int sorted_pos = sorted_stripe_min_pos[i];
283 sorted_pos_prefix_sum[i] = orig_pos_prefix_sum[sorted_pos];
285 * (stripes[sorted_pos].nstrides - 1)),
287 stripe_minmax[1][i] = (
Xt_int)(stripes[sorted_pos].start + (ofs & ~mask));
290 free(orig_pos_prefix_sum);
296 int *restrict overlap_count
297 = sorted_stripe_min_pos + 2 * num_stripes;
298 for (
size_t i = 0; i < num_stripes - 1; ++i) {
299 bool do_overlap = stripe_minmax[1][i] >= stripe_minmax[0][i + 1];
305 Xt_int range_max_idx =
MAX(stripe_minmax[1][i], stripe_minmax[1][i+1]);
306 while (j + 1 < num_stripes
307 && stripe_minmax[0][j + 1] <= range_max_idx) {
308 range_max_idx =
MAX(range_max_idx, stripe_minmax[1][j+1]);
311 overlap_count[i] = (int)(j - i);
314 while (j + 1 < num_stripes
315 && stripe_minmax[0][j + 1] > stripe_minmax[1][j])
317 overlap_count[i] = -(int)(j - i - 1);
321 overlap_count[num_stripes - 1] = 0;
326 while (i < num_stripes) {
328 bool do_overlap = overlap_count[i] > 0;
329 size_t num_selection = (size_t)(abs(overlap_count[i])) + 1;
332 for (
size_t j = 0; j < num_selection; ++j)
333 sel_size +=
decode_stripe(stripes[sorted_stripe_min_pos[i+j]],
334 sorted_vector_assign + offset + sel_size,
336 sorted_pos_prefix_sum[i+j]);
347 free(sorted_stripe_min_pos);
357 long long num_indices = 0;
359 for (
int i = 0; i < num_stripes; ++i)
360 num_indices += stripes[i].nstrides;
361 assert((
sizeof (
long long) >
sizeof (
int)) & (num_indices <= INT_MAX)
362 & (num_indices >= 0));
365 if (num_indices > 0) {
366 size_t vector_size = (size_t)num_indices *
sizeof (
Xt_int),
371 = (
Xt_int *)(
void *)((
unsigned char *)idxvec_obj + header_size);
372 idxvec_obj->
vector = indices;
374 size_t k = (size_t)-1;
375 for (
int i = 0; i < num_stripes; ++i)
376 for (
int j = 0; j < stripes[i].
nstrides; ++j)
399 int size_xt_idx, size_header;
401 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
403 &size_xt_idx), comm);
405 return (
size_t)size_xt_idx + (size_t)size_header;
415 buffer_size, position, comm), comm);
420 buffer_size, position, comm), comm);
428 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
429 &num_indices, 1, MPI_INT, comm), comm);
431 size_t vector_size = (size_t)num_indices *
sizeof (
Xt_int),
437 Xt_int *vector_assign = (
Xt_int *)(
void *)((
unsigned char *)idxvec + header_size);
438 idxvec->
vector = vector_assign;
439 if (num_indices != 0) {
440 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
441 vector_assign, num_indices,
444 fputs(
"warning: implementation generated empty vector!\n", stderr);
468 for (
size_t i = 1; i < num_indices; ++i)
474 for (
size_t i = 0; i < num_indices; ++i)
478 size_t svec_size = num_indices *
sizeof(
Xt_int);
500 size_t num_indices_inter = 0,
502 num_indices_dst = (
size_t)idxvec_dst->parent.num_indices;
504 size_t vector_size = num_indices_dst *
sizeof (idxvec_dst->vector[0]),
511 = (
Xt_int *)(
void *)((
unsigned char *)inter_vector + header_size);
512 inter_vector->
vector = vector_assign;
520 for (
size_t i = 0, j = 0; i < num_indices_dst; ++i) {
522 while (j < num_indices_src &&
523 sorted_src_vector[j] < sorted_dst_vector[i]) ++j;
524 if (j >= num_indices_src)
break;
525 if (sorted_src_vector[j] == sorted_dst_vector[i])
526 vector_assign[num_indices_inter++] = sorted_dst_vector[i];
529 if (num_indices_inter) {
530 vector_size = (size_t)num_indices_inter *
sizeof (idxvec_dst->vector[0]);
531 inter_vector =
xrealloc(inter_vector, header_size + vector_size);
533 = (
Xt_int *)(
void *)((
unsigned char *)inter_vector + header_size);
559 memcpy(indices, idxvec_obj->
vector,
579 stripes, num_stripes);
590 *index = idxvec_obj->
vector[position];
597 const int *restrict positions,
598 int num_pos_,
Xt_int *index,
606 size_t num_pos = num_pos_ >= 0 ? (
size_t)num_pos_ : (size_t)0;
607 for (
size_t ip = 0; ip < num_pos; ip++) {
608 int p = positions[ip];
609 if (p >= 0 && (
size_t)p < num_indices) {
612 index[ip] = undef_idx;
625 int * position,
int offset) {
632 if ((offset < 0) || ((size_t)offset >= num_indices))
643 size_t ub = num_indices - 1;
647 size_t middle = (ub + lb + 1)/2;
651 else if (ub == middle)
661 while (lb < num_indices - 1 &&
683 for (
size_t i = 1; i < n; i++)
684 if (idx[i] < idx[i-1])
return false;
691 const Xt_int *selection_idx,
692 int num_selection,
int *positions,
693 int single_match_only) {
695 if (num_selection <= 0)
return 0;
697 bool selection_is_ordered =
idx_vec_is_sorted(selection_idx, (
size_t)num_selection);
700 Xt_int const *sorted_selection;
701 int *sorted_selection_pos = NULL;
704 if (selection_is_ordered) {
705 sorted_selection = selection_idx;
707 size_t idx_memsize = (size_t)num_selection *
sizeof(*sorted_selection),
708 pos_memsize = (size_t)num_selection *
sizeof(*sorted_selection_pos),
709 #if defined _CRAYC && _RELEASE_MAJOR < 9
710 pos_ofs_roundup = _MAXVL_8,
712 pos_ofs_roundup =
sizeof(int),
715 pos_ofs = ((idx_memsize + pos_ofs_roundup - 1)
716 & ((size_t)-(ssize_t)(pos_ofs_roundup))),
718 alloc_size = pos_ofs + pos_memsize;
721 memcpy(tmp_idx, selection_idx, idx_memsize);
724 = (
void *)((
unsigned char *)tmp_idx + pos_ofs);
726 sorted_selection = tmp_idx;
737 int num_unmatched = 0;
740 size_t post_match_step = single_match_only != 0;
743 for (
size_t search_start = 0, ub_guess_ofs = 1;
744 i < (size_t)num_selection && search_start<=search_end;
746 size_t selection_pos = selection_is_ordered ? i : (size_t)sorted_selection_pos[i];
747 Xt_int isel = sorted_selection[i];
749 size_t ub =
MIN(search_start + ub_guess_ofs, search_end);
750 size_t lb = search_start;
752 if (sorted_body[ub] < isel) {
753 lb =
MIN(ub + 1, search_end);
759 size_t middle = (ub + lb + 1) / 2;
761 if (sorted_body[middle] <= isel)
767 while (sorted_body[lb] < isel && lb < ub)
771 if (isel == sorted_body[lb]) {
775 positions[selection_pos] = -1;
780 while (match_pos > search_start && sorted_body[match_pos-1] == isel)
785 positions[selection_pos] = sorted_body_pos[match_pos];
786 ub_guess_ofs = match_pos - search_start;
787 search_start = match_pos + post_match_step;
789 if (i < (
size_t)num_selection) {
790 num_unmatched += (int)((
size_t)num_selection - i);
791 if (selection_is_ordered)
794 }
while (++i < (
size_t)num_selection);
797 positions[sorted_selection_pos[i]] = -1;
798 }
while (++i < (
size_t)num_selection);
800 if (tmp_idx) free(tmp_idx);
802 return num_unmatched;
812 die(
"idxvec_get_min_index: empty index vector");
825 die(
"idxvec_get_max_index: empty index vector");
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
void xt_quicksort_index(Xt_int *v_idx, int n, int *v_pos, int reset_pos)
void xt_quicksort_xt_int_permutation(Xt_int *a, size_t n, int *restrict permutation)
const Xt_int * sorted_vector
struct Xt_idxlist_ parent
int * sorted_vec_positions
void(* delete)(Xt_idxlist)
static Xt_int Xt_isign_mask(Xt_int x)
base definitions header file
struct Xt_idxlist_ * Xt_idxlist
Xt_idxlist xt_idxempty_new(void)
Provide non-public declarations common to all index lists.
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
static Xt_int idxvec_get_min_index(Xt_idxlist idxlist)
Xt_idxlist xt_idxvec_prealloc_new(const Xt_int *idxvec, int num_indices)
static int idxvec_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static void idxvec_delete(Xt_idxlist data)
static int idxvec_get_positions_of_indices(Xt_idxlist idxlist, const Xt_int *indices, int num_indices, int *positions, int single_match_only)
static void idxvec_get_indices(Xt_idxlist idxlist, Xt_int *indices)
Xt_idxlist xt_idxvec_new(const Xt_int *idxvec, int num_indices)
static void idxvec_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
static Xt_idxlist idxvec_copy(Xt_idxlist idxlist)
static int idxvec_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static void idxvec_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static size_t idxvec_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static bool idx_vec_is_sorted(Xt_int const *idx, size_t n)
static int idxvec_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static int idxvec_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static const struct xt_idxlist_vtable idxvec_vtable
static Xt_int const * idxvec_get_indices_const(Xt_idxlist idxlist)
struct Xt_idxvec_ * Xt_idxvec
static const Xt_int * get_sorted_vector(Xt_idxvec idxvec)
Xt_idxlist xt_idxvec_from_stripes_new(const struct Xt_stripe stripes[], int num_stripes)
static Xt_int idxvec_get_max_index(Xt_idxlist idxlist)
static size_t decode_stripe(struct Xt_stripe stripe, Xt_int *sorted_vector, int *sorted_vec_pos, int pos_offset)
static void generate_sorted_vector_from_stripes(const struct Xt_stripe stripes[], int num_stripes_, Xt_idxvec idxvec)
Xt_idxlist xt_idxvec_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
Xt_idxlist xt_idxvec_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
#define xt_mpi_call(call, comm)
void xt_convert_indices_to_stripes(const Xt_int *indices, int num_indices, struct Xt_stripe **stripes, int *num_stripes)