104 int * position,
int offset);
107 int num_selection,
int *positions,
108 int single_match_only);
125 .get_indices_at_positions = NULL,
129 .get_positions_of_indices_off = NULL,
132 .get_bounding_box = NULL,
171 MPI_Aint base_address, local_size_address;
173 MPI_Get_address(&
dim_desc, &base_address);
176 enum { num_dt_components = 2 };
177 int block_lengths[num_dt_components] = { 4, 1 };
178 MPI_Aint displacements[num_dt_components]
179 = {0, local_size_address - base_address };
180 MPI_Datatype types[num_dt_components]
182 dim_desc_dt_unaligned;
183 xt_mpi_call(MPI_Type_create_struct(num_dt_components,
184 block_lengths, displacements, types,
185 &dim_desc_dt_unaligned), Xt_default_comm);
186 xt_mpi_call(MPI_Type_create_resized(dim_desc_dt_unaligned, 0,
189 xt_mpi_call(MPI_Type_free(&dim_desc_dt_unaligned), Xt_default_comm);
211 if (num_dimensions > 0) {
212 idxsection =
xmalloc(
sizeof (*idxsection)
213 + (
size_t)num_dimensions *
214 sizeof (idxsection->
dims[0]));
217 idxsection->
ndim = num_dimensions;
221 for (
int i = 0; i < num_dimensions; ++i) {
230 if (num_indices == 0) {
243 for (
int i = num_dimensions - 2; i >= 0; --i) {
258 for (
int i = num_dimensions - 1; i >= 0; --i) {
281 for (
int i = 0; i < num_dimensions; ++i) {
304 if (data == NULL)
return;
317 int size_header, size_dim_descs, size_xt_int;
319 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
322 comm, &size_dim_descs), comm);
324 return (
size_t)size_header + (size_t)size_dim_descs
325 + (
size_t)size_xt_int;
341 buffer_size, position, comm), comm);
343 buffer_size, position, comm), comm);
345 buffer, buffer_size, position, comm), comm);
356 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position, &ndim, 1, MPI_INT,
359 =
xmalloc(
sizeof (*section) + (
size_t)ndim *
sizeof(section->
dims[0]));
360 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
363 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
368 section->
ndim = ndim;
370 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
377 for (
int i = 0; i < ndim; ++i) {
407 INSTR_DEF(instr,
"idxsection_get_intersection_with_other_idxlist")
414 int single_match_only = 0;
417 for (i = 1; i < num_dst_idx; ++i)
418 if (dst_idx[i] < dst_idx[i-1])
421 Xt_int const * sorted_dst_idx;
422 Xt_int * temp_dst_idx = NULL;
425 if (num_dst_idx > 1 && i != num_dst_idx) {
427 temp_dst_idx =
xmalloc((
size_t)num_dst_idx *
sizeof(*temp_dst_idx));
428 memcpy(temp_dst_idx, dst_idx, (
size_t)num_dst_idx *
sizeof(*temp_dst_idx));
432 sorted_dst_idx = temp_dst_idx;
434 sorted_dst_idx = dst_idx;
436 pos =
xmalloc((
size_t)num_dst_idx *
sizeof(*pos));
438 src_idxsection, sorted_dst_idx, num_dst_idx, pos,
440 int num_inter_idx = num_dst_idx - num_unmatched;
442 if (num_inter_idx != 0) {
444 *
sizeof(*intersection));
446 for(i = 0, j = 0; i < num_dst_idx && j < num_inter_idx; i++) {
447 intersection[j] = sorted_dst_idx[i];
467 INSTR_DEF(instr,
"idxsection_get_intersection.part")
476 if (idxsection_src->
ndim != idxsection_dst->
ndim ||
484 for (i = 1; i < idxsection_src->
ndim; ++i)
488 idxlist_src, idxlist_dst);
503 for (i = 0; i < idxsection_src->
ndim; ++i) {
505 Xt_int src_start, src_end, dst_start, dst_end, local_end;
526 src_end = (
Xt_int)(src_start
528 dst_end = (
Xt_int)(dst_start
531 local_start[i] = (src_start > dst_start)?src_start:dst_start;
532 local_end = (src_end > dst_end)?dst_end:src_end;
564 int num_dimensions = src->
ndim;
567 + (
size_t)num_dimensions
568 *
sizeof (idxsection->
dims[0]));
572 memcpy(idxsection->
dims, src->
dims, (
size_t)num_dimensions *
573 sizeof (src->
dims[0]));
584 for (i = 0; i < section->
ndim; ++i)
586 assert(size <= INT_MAX);
594 int ndim,
struct dim_desc dims[ndim])
602 for (
int i = 0; i < abs_local_size; ++i)
603 indices[i] = (
Xt_int)(start_index + i);
605 for (
int i = 0; i < abs_local_size; ++i)
606 indices[i] = (
Xt_int)(start_index - i);
607 return abs_local_size;
611 int indices_written = 0, overflow = 0;
613 for (
int dim_ofs = 0; dim_ofs < abs_local_size; ++dim_ofs)
615 int indices_written_temp
619 indices + indices_written,
621 overflow |= (indices_written_temp > INT_MAX - indices_written);
622 indices_written += indices_written_temp;
625 return indices_written;
631 INSTR_DEF(instr,
"idxsection_get_indices")
637 if (num_indices > 0) {
642 (
size_t)num_indices *
sizeof(*indices));
657 (
size_t)num_indices *
sizeof(*indices));
679 INSTR_DEF(instr,
"idxsection_get_index_stripes.part")
683 int ndim = section->
ndim;
686 size_t nstripes = dims[ndim-1].
local_size != 0;
688 for (
int i = 0; i < ndim-1; ++i)
692 *num_stripes = (int)nstripes;
699 if ((
size_t)*num_stripes < nstripes)
700 p =
xrealloc(p, nstripes *
sizeof(**stripes));
702 enum { curr_local_position_auto_size=16 };
703 Xt_int curr_local_position_auto[curr_local_position_auto_size];
704 Xt_int *restrict curr_local_position;
705 if (ndim-2 <= curr_local_position_auto_size) {
706 curr_local_position = curr_local_position_auto;
707 for (
int i = 0; i < ndim-1; ++i)
708 curr_local_position[i] = 0;
711 =
xcalloc((
size_t)(ndim-2),
sizeof(*curr_local_position));
713 for (
size_t i = 0; i < nstripes; ++i) {
716 p[i].nstrides = abs(dims[ndim-1].local_size);
719 for (
int j = 0; j < ndim - 1; ++j)
721 + curr_local_position[j]
722 * dims[j].global_stride);
724 for (
int j = ndim - 2; j >= 0; --j)
725 if (curr_local_position[j] < abs(dims[j].local_size) - 1) {
726 curr_local_position[j]++;
729 curr_local_position[j] = 0;
732 *num_stripes = (int)nstripes;
733 if (curr_local_position != curr_local_position_auto)
734 free(curr_local_position);
745 if (position < 0)
return 1;
752 Xt_int curr_local_position;
753 long long pos = (
long long)position;
755 for (dim = 0; dim < section->
ndim; ++dim) {
762 temp_index = (
Xt_int)(temp_index
763 + curr_local_position
778 INSTR_DEF(instr,
"idxsection_get_position_of_index.part")
783 if (index < section->min_index_cache || index > section->
max_index_cache)
794 int temp_position = 0;
796 for (i = 0; i < section->
ndim; ++i) {
801 Xt_int curr_global_position
802 = (
Xt_int)(index / abs_global_stride);
810 index = (
Xt_int)(index % abs_global_stride);
812 if (curr_global_position < section->dims[i].local_start)
815 Xt_int curr_local_position
826 temp_position += (int)(curr_local_position * section->
dims[i].
local_stride);
829 *position = temp_position;
840 const Xt_int selection_idx[],
843 int single_match_only) {
845 INSTR_DEF(instr,
"idxsection_get_positions_of_indices_v1.part")
847 if (num_selection < 1)
return 0;
849 if (num_selection == 1)
853 int num_unmatched = 0;
855 if (!single_match_only) {
857 for (
int i = 0; i < num_selection; ++i)
861 return num_unmatched;
866 for (
size_t i = 1; i < (size_t)num_selection; ++i)
867 if (selection_idx[i] < selection_idx[i-1])
875 for (
size_t i = 0; i < (size_t)num_selection; i++) {
877 Xt_int curr_index = selection_idx[i];
879 if (prev_index != curr_index) {
884 prev_index = curr_index;
900 for (
size_t i = 0; i < (size_t)num_selection; i++) {
901 v[i].
idx = selection_idx[i];
906 for (
size_t i = 0; i < (size_t)num_selection; i++) {
924 return num_unmatched;
929 const Xt_int selection_idx[],
930 int num_selection,
int positions[],
931 int single_match_only) {
933 INSTR_DEF(instr,
"idxsection_get_positions_of_indices_v2.part")
935 if (num_selection < 1)
return 0;
937 if (num_selection == 1)
942 Xt_int * temp_selection_idx = NULL;
943 const Xt_int *restrict sorted_selection_idx;
944 int * selection_pos = NULL;
947 for (i = 1; i < (size_t)num_selection; ++i)
948 if (selection_idx[i] < selection_idx[i-1])
951 sorted_selection_idx = selection_idx;
956 =
xmalloc((
size_t)num_selection *
sizeof(*temp_selection_idx));
957 memcpy(temp_selection_idx, selection_idx,
958 (
size_t)num_selection *
sizeof(*temp_selection_idx));
959 selection_pos =
xmalloc((
size_t)num_selection *
sizeof(*selection_pos));
962 sorted_selection_idx = temp_selection_idx;
987 if (!single_match_only) {
989 if (selection_pos == NULL) {
991 for (
size_t j = 0; i < (size_t)num_selection && j < num_body_indices; ++i) {
993 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
995 if (j >= num_body_indices)
break;
997 positions[i] = (body_indices[j] == sorted_selection_idx[i])?(
int)j:-1;
1001 for (
size_t j = 0; i < (size_t)num_selection && j < num_body_indices; ++i) {
1003 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1005 if (j >= num_body_indices)
break;
1007 positions[selection_pos[i]] = (body_indices[j] == sorted_selection_idx[i])?(
int)j:-1;
1012 Xt_int last_idx = (
Xt_int)(sorted_selection_idx[0] - 1);
1014 if (selection_pos == NULL) {
1016 for (
size_t j = 0; i < (size_t)num_selection && j < num_body_indices; ++i) {
1018 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1020 if (j >= num_body_indices)
break;
1022 positions[i] = ((last_idx == sorted_selection_idx[i]) ||
1023 (body_indices[j] != sorted_selection_idx[i]))?-1:(
int)j;
1025 last_idx = sorted_selection_idx[i];
1029 for (
size_t j = 0; i < (size_t)num_selection && j < num_body_indices; ++i) {
1031 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1033 if (j >= num_body_indices)
break;
1035 positions[selection_pos[i]] = ((last_idx == sorted_selection_idx[i]) ||
1036 (body_indices[j] != sorted_selection_idx[i]))?-1:(
int)j;
1038 last_idx = sorted_selection_idx[i];
1044 if (selection_pos == NULL)
1045 for (; i < (size_t)num_selection; ++i)
1048 for (; i < (size_t)num_selection; ++i)
1049 positions[selection_pos[i]] = -1;
1051 free(temp_selection_idx);
1052 free(selection_pos);
1054 int num_unmatched = 0;
1057 for (
size_t j = 0; j < (size_t)num_selection; ++j)
1058 num_unmatched += positions[j] == -1;
1061 return num_unmatched;
1066 int position_offset,
1073 size_t num_processed = 0;
1075 Xt_int abs_global_size = XT_INT_ABS(dims[0].global_size);
1076 int abs_local_size = abs(dims[0].local_size);
1077 Xt_int abs_global_stride = XT_INT_ABS(dims[0].global_stride);
1078 Xt_int abs_local_stride = XT_INT_ABS(dims[0].local_stride);
1085 Xt_int tmp_local_start = dims[0].local_start;
1089 if (dims[0].global_size < 0)
1090 tmp_local_start = (
Xt_int)(-dims[0].global_size - tmp_local_start -
1093 Xt_int min_index = (
Xt_int)(index_offset + tmp_local_start * abs_global_stride);
1096 while ((num_processed < num_indices)
1097 && (indices[num_processed] < min_index))
1098 positions[num_processed++] = -1;
1101 if ((dims[0].global_stride < 0) ^ (dims[0].local_stride < 0)) {
1105 while ((num_processed < num_indices) &&
1106 ((curr_position = (
Xt_int)(indices[num_processed] - min_index)) <
1109 positions[num_processed++] = position_offset
1110 + (int)(abs_local_size - curr_position - 1);
1116 while ((num_processed < num_indices) &&
1117 ((curr_position = (
Xt_int)(indices[num_processed] - min_index)) <
1120 positions[num_processed++] = position_offset + (int)curr_position;
1126 while ((num_processed < num_indices) &&
1127 (indices[num_processed] < index_offset + abs_global_size))
1128 positions[num_processed++] = -1;
1134 Xt_int tmp_local_start = dims[0].local_start;
1138 if (dims[0].global_size < 0)
1139 tmp_local_start = (
Xt_int)(-dims[0].global_size - tmp_local_start -
1143 = (
Xt_int)(index_offset + tmp_local_start * abs_global_stride);
1146 while ((num_processed < num_indices)
1147 && (indices[num_processed] < min_index))
1148 positions[num_processed++] = -1;
1151 while (num_processed < num_indices) {
1153 Xt_int curr_global_position, curr_local_position;
1156 curr_global_position = (
Xt_int)((indices[num_processed] - index_offset) /
1160 if (curr_global_position >= tmp_local_start + abs_local_size)
1164 if ((dims[0].global_size < 0) ^ (dims[0].local_size < 0))
1166 curr_local_position = (
Xt_int)(abs_local_size - curr_global_position
1167 + tmp_local_start - 1);
1169 curr_local_position = (
Xt_int)(curr_global_position - tmp_local_start);
1172 = (
Xt_int)(index_offset + curr_global_position * abs_global_stride);
1175 int position_offset_ = (int)(curr_local_position * abs_local_stride);
1178 curr_index_offset, position_offset_, indices + num_processed,
1179 num_indices - num_processed, positions + num_processed, ndim-1,
1184 return num_processed;
1189 const Xt_int *restrict selection_idx,
1191 int *restrict positions,
1192 int single_match_only) {
1194 INSTR_DEF(instr,
"idxsection_get_positions_of_indices_v3.part")
1195 INSTR_DEF(instr2,
"idxsection_get_positions_of_indices_recursive")
1199 if (num_selection < 1)
return 0;
1201 if (num_selection == 1)
1206 const Xt_int * restrict sorted_selection_idx;
1207 Xt_int *temp_selection_idx = NULL;
1208 int *sorted_positions;
1209 int *selection_pos = NULL;
1211 for (
size_t i = 1; i < (size_t)num_selection; ++i)
1212 if (selection_idx[i] < selection_idx[i-1])
1213 goto unsorted_selection;
1215 sorted_selection_idx = selection_idx;
1216 sorted_positions = positions;
1217 goto sorted_selection;
1221 =
xmalloc((
size_t)num_selection *
sizeof(*temp_selection_idx));
1223 size_t num_sp_alloc = (size_t)num_selection;
1224 #if defined _CRAYC && _RELEASE_MAJOR < 9
1225 num_sp_alloc = (num_sp_alloc + _MAXVL_32 - 1) & ~(_MAXVL_32 - 1);
1227 size_t total_alloc = num_sp_alloc + (size_t)num_selection;
1229 =
xmalloc(total_alloc *
sizeof(*sorted_positions));
1230 selection_pos = sorted_positions + num_sp_alloc;
1232 memcpy(temp_selection_idx, selection_idx,
1233 (
size_t)num_selection *
sizeof(*temp_selection_idx));
1236 sorted_selection_idx = temp_selection_idx;
1241 size_t num_processed
1244 0, sorted_selection_idx, (
size_t)num_selection,
1245 sorted_positions, section->
ndim,
1251 for (
size_t i = num_processed; i < (size_t)num_selection; ++i)
1252 sorted_positions[i] = -1;
1255 if (single_match_only)
1256 for (
size_t i = 1; i < num_processed; ++i)
1257 if (sorted_selection_idx[i] == sorted_selection_idx[i-1])
1258 sorted_positions[i] = -1;
1261 if (sorted_selection_idx != selection_idx) {
1263 for (
size_t i = 0; i < (size_t)num_selection; ++i)
1264 positions[i] = sorted_positions[selection_pos[i]];
1266 free(sorted_positions);
1267 free(temp_selection_idx);
1271 size_t num_unmatched = (size_t)num_selection - num_processed;
1273 for (
size_t i = 0; i < num_processed; ++i)
1274 num_unmatched += positions[i] == -1;
1278 return (
int)num_unmatched;
1283 const Xt_int *selection_idx,
1284 int num_selection,
int * positions,
1285 int single_match_only) {
1287 INSTR_DEF(instr,
"idxsection_get_positions_of_indices")
1296 for (
int i = 0; i < section->
ndim; ++i)
1299 num_selection, positions,
1313 (((
size_t)num_section_indices *
sizeof(
Xt_int)
1314 <= (
size_t)128 * 1024U * 1024U)
1315 && ((
Xt_int)num_section_indices <= 1000 * num_selection))) {
1317 num_selection, positions,
1323 num_selection, positions,
1336 int * position,
int offset) {
1343 if (temp_position < offset)
1346 *position = temp_position;
void xt_mergesort_index(Xt_int *val, int n, int *pos, int reset_pos)
void xt_mergesort_idxpos(idxpos_type *restrict v, size_t n)
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
#define xcalloc(nmemb, size)
void xt_quicksort_index(Xt_int *v_idx, int n, int *v_pos, int reset_pos)
Xt_int * index_array_cache
Xt_int global_start_index
struct Xt_idxlist_ parent
void(* delete)(Xt_idxlist)
static Xt_int Xt_isign(Xt_int x)
Xt_idxlist xt_idxempty_new(void)
int xt_idxlist_get_num_indices(Xt_idxlist idxlist)
const Xt_int * xt_idxlist_get_indices_const(Xt_idxlist idxlist)
Provide non-public declarations common to all index lists.
Xt_idxlist xt_default_isect(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
static int idxsection_get_positions_of_indices(Xt_idxlist body_idxlist, Xt_int const *selection_idx, int num_selection, int *positions, int single_match_only)
Xt_idxlist xt_idxsection_new(Xt_int start, int num_dimensions, const Xt_int global_size[num_dimensions], const int local_size[num_dimensions], const Xt_int local_start[num_dimensions])
Xt_idxlist xt_idxsection_get_intersection_with_other_idxlist(Xt_idxlist src_idxsection, Xt_idxlist dst_idxlist)
static Xt_idxlist idxsection_copy(Xt_idxlist idxlist)
static int idxsection_get_positions_of_indices_v3(Xt_idxlist body_idxlist, const Xt_int *restrict selection_idx, int num_selection, int *restrict positions, int single_match_only)
Xt_idxlist xt_idxsection_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
static const struct xt_idxlist_vtable idxsection_vtable
void xt_idxsection_initialize(void)
static MPI_Datatype dim_desc_dt
static void idxsection_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
void xt_idxsection_finalize(void)
static int idxsection_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static int idxsection_get_positions_of_indices_v2(Xt_idxlist body_idxlist, const Xt_int selection_idx[], int num_selection, int positions[], int single_match_only)
static Xt_int idxsection_get_max_index(Xt_idxlist idxlist)
static void idxsection_delete(Xt_idxlist data)
Xt_idxlist xt_idxsection_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
static int idxsection_get_indices_any(Xt_int start_index, Xt_int *indices, int ndim, struct dim_desc dims[ndim])
static int idxsection_get_num_indices(Xt_idxsection section)
static int idxsection_get_positions_of_indices_v1(Xt_idxlist body_idxlist, const Xt_int selection_idx[], int num_selection, int positions[], int single_match_only)
static void idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static size_t idxsection_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static size_t idxsection_get_positions_of_indices_recursive(Xt_int index_offset, int position_offset, const Xt_int indices[], size_t num_indices, int positions[], int ndim, struct dim_desc dims[ndim])
static int idxsection_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static int idxsection_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
struct Xt_idxsection_ * Xt_idxsection
static Xt_int idxsection_get_min_index(Xt_idxlist idxlist)
static void idxsection_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static const Xt_int * idxsection_get_indices_const(Xt_idxlist idxlist)
Xt_idxlist xt_idxvec_new(const Xt_int *idxlist, int num_indices)
#define xt_mpi_call(call, comm)