73 int count, MPI_Datatype old_type);
76 MPI_Datatype old_type);
82 MPI_Comm_rank(comm, &rank);
84 char error_string[MPI_MAX_ERROR_STRING];
85 int length_of_error_string, error_class;
87 MPI_Error_class(error_code, &error_class);
88 MPI_Error_string(error_class, error_string, &length_of_error_string);
89 fprintf(stderr,
"%3d: %s\n", rank, error_string);
90 MPI_Error_string(error_code, error_string, &length_of_error_string);
91 fprintf(stderr,
"%3d: %s\n", rank, error_string);
92 MPI_Abort(comm, error_code);
96 static MPI_Datatype copy_mpi_datatype(MPI_Datatype old_type,
MPI_Comm comm) {
98 MPI_Datatype datatype;
100 xt_mpi_call(MPI_Type_dup(old_type, &datatype), comm);
106 gen_mpi_datatype_simple(
int displacement, MPI_Datatype old_type,
MPI_Comm comm)
108 MPI_Datatype datatype;
110 xt_mpi_call(MPI_Type_create_indexed_block(1, 1, &displacement, old_type,
118 gen_mpi_datatype_contiguous(
int displacement,
int blocklength,
119 MPI_Datatype old_type,
MPI_Comm comm) {
121 MPI_Datatype datatype;
123 if (displacement == 0)
124 xt_mpi_call(MPI_Type_contiguous(blocklength, old_type, &datatype),
127 xt_mpi_call(MPI_Type_create_indexed_block(1, blocklength,
128 &displacement, old_type,
138 gen_mpi_datatype_vector(
int count,
int blocklength,
int stride,
139 int offset, MPI_Datatype old_type,
MPI_Comm comm) {
141 MPI_Datatype datatype;
143 xt_mpi_call(MPI_Type_vector(count, blocklength, stride, old_type,
147 MPI_Datatype datatype_;
148 int hindexed_blocklength = 1;
149 MPI_Aint old_type_size, old_type_lb;
151 xt_mpi_call(MPI_Type_get_extent(old_type, &old_type_lb,
152 &old_type_size), comm);
154 MPI_Aint displacement = offset * old_type_size;
156 xt_mpi_call(MPI_Type_create_hindexed(1, &hindexed_blocklength,
157 &displacement, datatype, &datatype_),
160 datatype = datatype_;
168 gen_mpi_datatype_indexed_block(
int const * displacements,
int blocklength,
169 int count, MPI_Datatype old_type,
MPI_Comm comm)
171 MPI_Datatype datatype;
173 xt_mpi_call(MPI_Type_create_indexed_block(count, blocklength,
174 (
void *)displacements,
175 old_type, &datatype), comm);
182 gen_mpi_datatype_indexed(
const int *displacements,
const int *blocklengths,
183 int count, MPI_Datatype old_type,
MPI_Comm comm) {
185 MPI_Datatype datatype;
187 xt_mpi_call(MPI_Type_indexed(count, (
int*)blocklengths, (
void*)displacements,
188 old_type, &datatype), comm);
195 check_for_vector_type(
const int *displacements,
const int *blocklengths,
198 int blocklength = blocklengths[0];
200 for (
int i = 1; i < count; ++i)
201 if (blocklengths[i] != blocklength)
204 int stride = displacements[1] - displacements[0];
206 for (
int i = 1; i + 1 < count; ++i)
207 if (displacements[i+1] - displacements[i] != stride)
213 static inline int check_for_indexed_block_type(
const int *blocklengths,
216 int blocklength = blocklengths[0];
218 for (
int i = 1; i < count; ++i)
219 if (blocklengths[i] != blocklength)
228 const int *blocklengths,
229 int count, MPI_Datatype old_type,
238 MPI_Datatype datatype;
241 datatype = MPI_DATATYPE_NULL;
242 else if (count == 1 && blocklengths[0] == 1 && displacements[0] == 0)
243 datatype = copy_mpi_datatype(old_type, comm);
244 else if (count == 1 && blocklengths[0] == 1)
245 datatype = gen_mpi_datatype_simple(displacements[0], old_type, comm);
247 datatype = gen_mpi_datatype_contiguous(displacements[0], blocklengths[0],
249 else if (check_for_vector_type(displacements, blocklengths, count))
250 datatype = gen_mpi_datatype_vector(count, blocklengths[0],
251 displacements[1] - displacements[0],
252 displacements[0], old_type, comm);
253 else if (check_for_indexed_block_type(blocklengths, count))
254 datatype = gen_mpi_datatype_indexed_block(displacements, blocklengths[0],
255 count, old_type, comm);
257 datatype = gen_mpi_datatype_indexed(displacements, blocklengths, count,
265 MPI_Datatype old_type,
MPI_Comm comm) {
270 return MPI_DATATYPE_NULL;
276 int * blocklengths =
xmalloc((
size_t)count *
sizeof(*blocklengths));
282 while (i + j < count && displacements[i] + j == displacements[i + j])
284 blocklengths[new_count++] = j;
289 int * tmp_displ = NULL;
292 if (new_count != count) {
294 tmp_displ =
xmalloc((
size_t)new_count *
sizeof(*tmp_displ));
298 for (
int i = 0; i < new_count; ++i) {
300 tmp_displ[i] = displacements[offset];
301 offset += blocklengths[i];
306 displ = displacements;
308 MPI_Datatype datatype;
325 if (disp_len<1)
return 0;
329 for (
size_t p = 1; p < disp_len; ++p) {
330 int new_stride = disp[p] - disp[p-1];
334 }
else if (new_stride == x.
stride) {
357 }
else if (x.
size == 2) {
367 v[i].start = x.
start;
378 MPI_Datatype old_type,
int *disp, MPI_Datatype *dt) {
382 if (p >= vlen)
return 0;
383 int nstrides = v[p].
size;
385 if (nstrides < 2 || stride == 1 )
return 0;
389 *disp = vlen > 1 ? v[p].
start : 0;
392 xt_mpi_call(MPI_Type_vector(nstrides, 1, stride, old_type, &dt1),
395 int start = v[p].
start - *disp;
400 MPI_Aint old_type_size, old_type_lb;
401 xt_mpi_call(MPI_Type_get_extent(old_type, &old_type_lb,
402 &old_type_size), Xt_default_comm);
404 MPI_Aint displacement = start * old_type_size;
407 xt_mpi_call(MPI_Type_create_hindexed(1, &bl2, &displacement, dt1, &dt2),
422 MPI_Datatype old_type,
int *disp, MPI_Datatype *dt) {
424 size_t p = *pstart_, pstart = p;
425 if (p+2 >= vlen || v[p].stride != 1 || v[p+1].stride != 1 )
return 0;
427 if (bl < 1 || v[p+1].size != bl)
return 0;
432 while( p < vlen && v[p].stride == 1 && v[p].size == bl &&
433 v[p].start - v[p-1].start == vstride ) {
436 size_t n = p - pstart;
437 if (n<3)
return false;
439 *disp = n == vlen ? 0 : v[pstart].
start;
442 xt_mpi_call(MPI_Type_vector((
int)n, bl, vstride, old_type, &dt1),
445 int start = v[pstart].
start - *disp;
452 MPI_Aint old_type_size, old_type_lb;
453 xt_mpi_call(MPI_Type_get_extent(old_type, &old_type_lb,
454 &old_type_size), Xt_default_comm);
456 MPI_Aint displacement = start * old_type_size;
459 xt_mpi_call(MPI_Type_create_hindexed(1, &bl2, &displacement, dt1, &dt2),
469 MPI_Datatype old_type,
int *restrict disp, MPI_Datatype *dt) {
471 if (p >= vlen || v[p].stride != 1 || v[p].size < 2)
return 0;
473 int d = v[p].
start - (*disp = vlen > 1 ? v[p].
start : 0);
476 xt_mpi_call(MPI_Type_contiguous(v[p].size, old_type, dt), Xt_default_comm) ;
478 xt_mpi_call(MPI_Type_create_indexed_block(1, v[p].size, &d, old_type, dt),
482 return v[p].
size != 0;
487 MPI_Datatype old_type,
int *disp, MPI_Datatype *dt) {
489 size_t p = *pstart_, pstart = p;
490 if (p >= vlen || v[p].stride != 1 || v[p].size < 2)
return 0;
494 while (p < vlen && v[p].stride == 1);
496 size_t n = p - pstart;
498 if (n < 2)
return false;
500 int start = (*disp = n == vlen ? 0 : v[pstart].
start);
502 int *restrict bl =
xmalloc(2 * n *
sizeof (*bl)),
503 *restrict d = bl + n;
505 d[0] = v[pstart].
start - start;
506 int bl0 = bl[0] = v[pstart].
size;
507 for (
size_t i = 1; i < n; i++) {
508 size_t iv = pstart + i;
509 d[i] = v[iv].
start - start;
511 hom_bl &= (bl[i] == bl0);
515 xt_mpi_call(MPI_Type_create_indexed_block((
int)n, bl0, d, old_type, dt),
518 xt_mpi_call(MPI_Type_indexed((
int)n, bl, d, old_type, dt), Xt_default_comm);
530 size_t vlen, MPI_Datatype old_type,
int *offset,
532 size_t ia = set_start;
534 if (ib <= ia || ib > vlen)
return 0;
537 for (
size_t i=ia; i < ib; i++)
543 if (ia == 0 && ib == vlen) {
553 int *restrict d =
xmalloc(
sizeof (*d) * (
size_t)n);
556 for (
size_t i=ia; i < ib; i++) {
557 size_t v_i_size = (size_t)(v[i].size > 0 ? v[i].size : 0);
558 for (
size_t k=0; k < v_i_size; k++) {
559 d[p] = v[i].
start + (int)k * v[i].stride - start;
564 if (n==1 && d[0] == 0) {
571 xt_mpi_call(MPI_Type_dup(old_type, dt), Xt_default_comm);
573 xt_mpi_call(MPI_Type_create_indexed_block(n, 1, d, old_type, dt), Xt_default_comm);
584 size_t set_start = 0, set_end = 0;
585 MPI_Datatype *restrict wdt =
xmalloc(
sizeof(*wdt) * (
size_t)vlen);
586 int *restrict wdisp =
xmalloc(
sizeof (*wdisp) * (
size_t)vlen);
595 size_t mm = m + (set_start < set_end);
602 if (set_start < set_end) {
613 if (set_start < set_end) {
618 MPI_Datatype result_dt;
620 assert(wdisp[0] == 0);
623 MPI_Aint old_type_lb, old_type_extent;
624 MPI_Aint *restrict wbdisp =
xmalloc((
size_t)wlen *
sizeof (*wbdisp));
625 int *restrict wblocklength
626 =
xmalloc((
size_t)wlen *
sizeof (*wblocklength));;
627 xt_mpi_call(MPI_Type_get_extent(old_type, &old_type_lb,
628 &old_type_extent), Xt_default_comm);
629 for(
size_t i=0; i<wlen; i++) {
630 wbdisp[i] = wdisp[i] * old_type_extent;
633 xt_mpi_call(MPI_Type_create_struct((
int)wlen, wblocklength, wbdisp,
634 wdt, &result_dt), Xt_default_comm);
637 for (
size_t i = 0; i < wlen; i++)
638 xt_mpi_call(MPI_Type_free(wdt+i), Xt_default_comm);
640 xt_mpi_call(MPI_Type_commit(&result_dt), Xt_default_comm);
648 int count, MPI_Datatype old_type,
653 if (count < 1)
return MPI_DATATYPE_NULL;
660 int count, MPI_Datatype old_type) {
662 if (count < 1)
return MPI_DATATYPE_NULL;
664 for (
size_t i=0; i<(size_t)count; ++i) {
665 v[i].start = disp[i];
667 v[i].size = blocklengths[i];
669 MPI_Datatype dt =
parse_stripe(v, (
size_t)count, old_type);
676 MPI_Datatype old_type) {
678 if (disp_len < 1)
return MPI_DATATYPE_NULL;
681 size_t vlen =
scan_stripe(disp, (
size_t)disp_len, v);
706 void *attribute_val_out,
int *flag)
717 *(
void **)attribute_val_out = new_comm_attr;
719 retval = MPI_SUCCESS;
722 retval = MPI_ERR_NO_MEM;
730 void *attribute_val,
void *
XT_UNUSED(extra_state))
764 &attr_val, &attr_found),
766 return attr_found ? attr_val : NULL;
769 #if HAVE_DECL___BUILTIN_CTZL
770 #define ctzl(v) (__builtin_ctzl(v))
771 #elif HAVE_DECL___BUILTIN_CLZL
773 ctzl(
unsigned long v) {
775 ulong_bits =
sizeof (
unsigned long) * CHAR_BIT,
779 int c = ulong_bits - __builtin_clzl(v) - 1;
786 ulong_bits =
sizeof (
unsigned long) * CHAR_BIT,
789 unsigned int c = ulong_bits;
790 v &= (
unsigned long)-(
long)v;
792 #if SIZEOF_UNSIGNED_LONG * CHAR_BIT == 64
793 if (v & UINT64_C(0x00000000ffffffff)) c -= 32;
794 if (v & UINT64_C(0x0000ffff0000ffff)) c -= 16;
795 if (v & UINT64_C(0x00ff00ff00ff00ff)) c -= 8;
796 if (v & UINT64_C(0x0f0f0f0f0f0f0f0f)) c -= 4;
797 if (v & UINT64_C(0x3333333333333333)) c -= 2;
798 if (v & UINT64_C(0x5555555555555555)) c -= 1;
799 #elif SIZEOF_UNSIGNED_LONG * CHAR_BIT == 32
800 if (v & 0x0000FFFFUL) c -= 16;
801 if (v & 0x00FF00FFUL) c -= 8;
802 if (v & 0x0F0F0F0FUL) c -= 4;
803 if (v & 0x33333333UL) c -= 2;
804 if (v & 0x55555555UL) c -= 1;
806 error
"Unexpected size of long.\n"
831 =
xmalloc(
sizeof (*new_comm_xt_attr_val)
840 new_comm_xt_attr_val), comm_dest);
844 unset_lsb = ~used_map_entry & (used_map_entry + 1),
846 comm_xt_attr_val->
used_map[position] = used_map_entry | unset_lsb;
857 comm_attr), comm_dest);
893 MPI_Request *restrict req,
894 int *restrict ops_completed,
MPI_Comm comm)
897 size_t num_req_ = (size_t)*num_req;
898 xt_mpi_call(MPI_Testsome(*num_req, req, &done_count, ops_completed,
899 MPI_STATUSES_IGNORE), comm);
900 if (done_count != MPI_UNDEFINED) {
901 if (num_req_ > (
size_t)done_count) {
904 i < (size_t)done_count && j >= num_req_ - (
size_t)done_count;
906 if (ops_completed[i] < (
int)num_req_ - done_count) {
907 while (req[--j] == MPI_REQUEST_NULL);
908 req[ops_completed[i]] = req[j];
910 num_req_ -= (size_t)done_count;
915 *num_req = (int)num_req_;
916 return num_req_ == 0;
add versions of standard API functions not returning on error
base definitions header file
MPI_Datatype xt_mpi_generate_datatype(int const *displacements, int count, MPI_Datatype old_type, MPI_Comm comm)
MPI_Comm xt_mpi_comm_smart_dup(MPI_Comm comm, int *tag_offset)
unsigned long used_map_elem
void xt_mpi_error(int error_code, MPI_Comm comm)
static int xt_mpi_tag_ub_val
static struct xt_mpi_comm_internal_attr * xt_mpi_comm_get_internal_attr(MPI_Comm comm)
void xt_mpi_finalize(void)
static MPI_Datatype parse_stripe(const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type)
static MPI_Datatype xt_mpi_generate_compact_datatype_block(const int *disp, const int *blocklengths, int count, MPI_Datatype old_type)
static int xt_mpi_comm_internal_keyval_delete(MPI_Comm XT_UNUSED(comm), int XT_UNUSED(comm_keyval), void *attribute_val, void *XT_UNUSED(extra_state))
static size_t scan_stripe(const int *disp, size_t disp_len, struct Xt_offset_ext *restrict v)
static int gen_fallback_type(size_t set_start, size_t set_end, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *offset, MPI_Datatype *dt)
static int xt_mpi_comm_internal_keyval_copy(MPI_Comm XT_UNUSED(oldcomm), int XT_UNUSED(keyval), void *XT_UNUSED(extra_state), void *XT_UNUSED(attribute_val_in), void *attribute_val_out, int *flag)
static bool match_contiguous(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *restrict disp, MPI_Datatype *dt)
int xt_mpi_comm_internal_keyval
void xt_mpi_comm_smart_dedup(MPI_Comm *comm, int tag_offset)
bool xt_mpi_test_some(int *restrict num_req, MPI_Request *restrict req, int *restrict ops_completed, MPI_Comm comm)
MPI_Datatype xt_mpi_generate_datatype_stripe(const struct Xt_offset_ext *v, int count, MPI_Datatype old_type, MPI_Comm XT_UNUSED(comm))
static int ctzl(unsigned long v)
static bool match_block_vec(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *disp, MPI_Datatype *dt)
static int match_simple_vec(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *disp, MPI_Datatype *dt)
static MPI_Datatype xt_mpi_generate_compact_datatype(int const *disp, int disp_len, MPI_Datatype old_type)
MPI_Datatype xt_mpi_generate_datatype_block(const int *displacements, const int *blocklengths, int count, MPI_Datatype old_type, MPI_Comm comm)
static bool match_indexed(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *disp, MPI_Datatype *dt)
void xt_mpi_comm_mark_exclusive(MPI_Comm comm)
#define xt_mpi_call(call, comm)