Yet Another eXchange Tool  0.9.0
xt_mpi.c
Go to the documentation of this file.
1 
12 /*
13  * Keywords:
14  * Maintainer: Jörg Behrens <behrens@dkrz.de>
15  * Moritz Hanke <hanke@dkrz.de>
16  * Thomas Jahns <jahns@dkrz.de>
17  * URL: https://doc.redmine.dkrz.de/yaxt/html/
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are
21  * met:
22  *
23  * Redistributions of source code must retain the above copyright notice,
24  * this list of conditions and the following disclaimer.
25  *
26  * Redistributions in binary form must reproduce the above copyright
27  * notice, this list of conditions and the following disclaimer in the
28  * documentation and/or other materials provided with the distribution.
29  *
30  * Neither the name of the DKRZ GmbH nor the names of its contributors
31  * may be used to endorse or promote products derived from this software
32  * without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  */
46 #ifdef HAVE_CONFIG_H
47 #include "config.h"
48 #endif
49 
50 #include <assert.h>
51 #include <inttypes.h>
52 #include <limits.h>
53 #include <stdbool.h>
54 #include <stdlib.h>
55 #include <stdio.h>
56 
57 #include <mpi.h>
58 #include "core/core.h"
59 #include "core/ppm_xfuncs.h"
60 #include "xt/xt_core.h"
61 #include "xt/xt_mpi.h"
62 #include "xt_mpi_internal.h"
63 
69 #define COMPACT_DT
70 
71 static MPI_Datatype
72 xt_mpi_generate_compact_datatype_block(const int *disp, const int *blocklengths,
73  int count, MPI_Datatype old_type);
74 static MPI_Datatype
75 xt_mpi_generate_compact_datatype(int const *disp, int disp_len,
76  MPI_Datatype old_type);
77 
78 
79 //taken from http://beige.ucs.indiana.edu/I590/node85.html
80 void xt_mpi_error(int error_code, MPI_Comm comm) {
81  int rank;
82  MPI_Comm_rank(comm, &rank);
83 
84  char error_string[MPI_MAX_ERROR_STRING];
85  int length_of_error_string, error_class;
86 
87  MPI_Error_class(error_code, &error_class);
88  MPI_Error_string(error_class, error_string, &length_of_error_string);
89  fprintf(stderr, "%3d: %s\n", rank, error_string);
90  MPI_Error_string(error_code, error_string, &length_of_error_string);
91  fprintf(stderr, "%3d: %s\n", rank, error_string);
92  MPI_Abort(comm, error_code);
93 }
94 
95 #ifndef COMPACT_DT
96 static MPI_Datatype copy_mpi_datatype(MPI_Datatype old_type, MPI_Comm comm) {
97 
98  MPI_Datatype datatype;
99 
100  xt_mpi_call(MPI_Type_dup(old_type, &datatype), comm);
101 
102  return datatype;
103 }
104 
105 static MPI_Datatype
106 gen_mpi_datatype_simple(int displacement, MPI_Datatype old_type, MPI_Comm comm)
107 {
108  MPI_Datatype datatype;
109 
110  xt_mpi_call(MPI_Type_create_indexed_block(1, 1, &displacement, old_type,
111  &datatype), comm);
112  xt_mpi_call(MPI_Type_commit(&datatype), comm);
113 
114  return datatype;
115 }
116 
117 static MPI_Datatype
118 gen_mpi_datatype_contiguous(int displacement, int blocklength,
119  MPI_Datatype old_type, MPI_Comm comm) {
120 
121  MPI_Datatype datatype;
122 
123  if (displacement == 0)
124  xt_mpi_call(MPI_Type_contiguous(blocklength, old_type, &datatype),
125  comm);
126  else
127  xt_mpi_call(MPI_Type_create_indexed_block(1, blocklength,
128  &displacement, old_type,
129  &datatype), comm);
130 
131  xt_mpi_call(MPI_Type_commit(&datatype), comm);
132 
133  return datatype;
134 
135 }
136 
137 static MPI_Datatype
138 gen_mpi_datatype_vector(int count, int blocklength, int stride,
139  int offset, MPI_Datatype old_type, MPI_Comm comm) {
140 
141  MPI_Datatype datatype;
142 
143  xt_mpi_call(MPI_Type_vector(count, blocklength, stride, old_type,
144  &datatype), comm);
145  if (offset != 0) {
146 
147  MPI_Datatype datatype_;
148  int hindexed_blocklength = 1;
149  MPI_Aint old_type_size, old_type_lb;
150 
151  xt_mpi_call(MPI_Type_get_extent(old_type, &old_type_lb,
152  &old_type_size), comm);
153 
154  MPI_Aint displacement = offset * old_type_size;
155 
156  xt_mpi_call(MPI_Type_create_hindexed(1, &hindexed_blocklength,
157  &displacement, datatype, &datatype_),
158  comm);
159  xt_mpi_call(MPI_Type_free(&datatype), comm);
160  datatype = datatype_;
161  }
162  xt_mpi_call(MPI_Type_commit(&datatype), comm);
163 
164  return datatype;
165 }
166 
167 static MPI_Datatype
168 gen_mpi_datatype_indexed_block(int const * displacements, int blocklength,
169  int count, MPI_Datatype old_type, MPI_Comm comm)
170 {
171  MPI_Datatype datatype;
172 
173  xt_mpi_call(MPI_Type_create_indexed_block(count, blocklength,
174  (void *)displacements,
175  old_type, &datatype), comm);
176  xt_mpi_call(MPI_Type_commit(&datatype), comm);
177 
178  return datatype;
179 }
180 
181 static MPI_Datatype
182 gen_mpi_datatype_indexed(const int *displacements, const int *blocklengths,
183  int count, MPI_Datatype old_type, MPI_Comm comm) {
184 
185  MPI_Datatype datatype;
186 
187  xt_mpi_call(MPI_Type_indexed(count, (int*)blocklengths, (void*)displacements,
188  old_type, &datatype), comm);
189  xt_mpi_call(MPI_Type_commit(&datatype), comm);
190 
191  return datatype;
192 }
193 
194 static inline int
195 check_for_vector_type(const int *displacements, const int *blocklengths,
196  int count) {
197 
198  int blocklength = blocklengths[0];
199 
200  for (int i = 1; i < count; ++i)
201  if (blocklengths[i] != blocklength)
202  return 0;
203 
204  int stride = displacements[1] - displacements[0];
205 
206  for (int i = 1; i + 1 < count; ++i)
207  if (displacements[i+1] - displacements[i] != stride)
208  return 0;
209 
210  return 1;
211 }
212 
213 static inline int check_for_indexed_block_type(const int *blocklengths,
214  int count) {
215 
216  int blocklength = blocklengths[0];
217 
218  for (int i = 1; i < count; ++i)
219  if (blocklengths[i] != blocklength)
220  return 0;
221 
222  return 1;
223 }
224 #endif
225 
226 MPI_Datatype
227 xt_mpi_generate_datatype_block(const int *displacements,
228  const int *blocklengths,
229  int count, MPI_Datatype old_type,
230  MPI_Comm comm) {
231  // ensure that yaxt is initialized
232  assert(xt_initialized());
233 
234 #ifdef COMPACT_DT
235  (void)comm;
236  return xt_mpi_generate_compact_datatype_block(displacements, blocklengths, count, old_type);
237 #else
238  MPI_Datatype datatype;
239 
240  if (count == 0)
241  datatype = MPI_DATATYPE_NULL;
242  else if (count == 1 && blocklengths[0] == 1 && displacements[0] == 0)
243  datatype = copy_mpi_datatype(old_type, comm);
244  else if (count == 1 && blocklengths[0] == 1)
245  datatype = gen_mpi_datatype_simple(displacements[0], old_type, comm);
246  else if (count == 1)
247  datatype = gen_mpi_datatype_contiguous(displacements[0], blocklengths[0],
248  old_type, comm);
249  else if (check_for_vector_type(displacements, blocklengths, count))
250  datatype = gen_mpi_datatype_vector(count, blocklengths[0],
251  displacements[1] - displacements[0],
252  displacements[0], old_type, comm);
253  else if (check_for_indexed_block_type(blocklengths, count))
254  datatype = gen_mpi_datatype_indexed_block(displacements, blocklengths[0],
255  count, old_type, comm);
256  else
257  datatype = gen_mpi_datatype_indexed(displacements, blocklengths, count,
258  old_type, comm);
259 
260  return datatype;
261 #endif
262 }
263 
264 MPI_Datatype xt_mpi_generate_datatype(int const * displacements, int count,
265  MPI_Datatype old_type, MPI_Comm comm) {
266  // ensure that yaxt is initialized
267  assert(xt_initialized());
268 
269  if (count <= 0)
270  return MPI_DATATYPE_NULL;
271 
272 #ifdef COMPACT_DT
273  (void)comm;
274  return xt_mpi_generate_compact_datatype(displacements, count, old_type);
275 #else
276  int * blocklengths = xmalloc((size_t)count * sizeof(*blocklengths));
277  int new_count = 0;
278  {
279  int i = 0;
280  do {
281  int j = 1;
282  while (i + j < count && displacements[i] + j == displacements[i + j])
283  ++j;
284  blocklengths[new_count++] = j;
285  i += j;
286  } while (i < count);
287  }
288 
289  int * tmp_displ = NULL;
290  const int *displ;
291 
292  if (new_count != count) {
293 
294  tmp_displ = xmalloc((size_t)new_count * sizeof(*tmp_displ));
295 
296  int offset = 0;
297 
298  for (int i = 0; i < new_count; ++i) {
299 
300  tmp_displ[i] = displacements[offset];
301  offset += blocklengths[i];
302  }
303 
304  displ = tmp_displ;
305  } else
306  displ = displacements;
307 
308  MPI_Datatype datatype;
309 
310  datatype = xt_mpi_generate_datatype_block(displ, blocklengths, new_count,
311  old_type, comm);
312 
313  free(blocklengths);
314 
315  free(tmp_displ);
316 
317  return datatype;
318 #endif
319 }
320 
321 
322 static size_t
323 scan_stripe(const int *disp, size_t disp_len, struct Xt_offset_ext *restrict v)
324 {
325  if (disp_len<1) return 0;
326 
327  struct Xt_offset_ext x = (struct Xt_offset_ext){ disp[0], 1, 1 };
328  size_t i = 0;
329  for (size_t p = 1; p < disp_len; ++p) {
330  int new_stride = disp[p] - disp[p-1];
331  if (x.size == 1) {
332  x.stride = new_stride;
333  x.size = 2;
334  } else if (new_stride == x.stride) {
335  // x.size >= 2:
336  x.size++;
337  } else if (x.size > 2 || (x.size == 2 && x.stride == 1) ) {
338  // we accept small contiguous vectors (nstrides==2, stride==1)
339  v[i]= x;
340  i++;
341  x = (struct Xt_offset_ext){ disp[p], 1, 1 };
342  } else { // x.size == 2, next offset doesn't match current stride
343  // break up trivial vec:
344  v[i].start = x.start;
345  v[i].size = 1;
346  v[i].stride = 1;
347  i++;
348  x.start += x.stride;
349  x.size = 2;
350  x.stride = new_stride;
351  }
352  }
353  // tail cases:
354  if (x.size > 2 || (x.size == 2 && x.stride == 1)) {
355  v[i] = x;
356  i++;
357  } else if (x.size == 2) {
358  v[i].start = x.start;
359  v[i].size = 1;
360  v[i].stride = 1;
361  i++;
362  v[i].start = x.start + x.stride;
363  v[i].size = 1;
364  v[i].stride = 1;
365  i++;
366  } else { // x.size == 1
367  v[i].start = x.start;
368  v[i].size = 1;
369  v[i].stride = 1;
370  i++;
371  }
372 
373  return i;
374 }
375 
376 static int
377 match_simple_vec(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen,
378  MPI_Datatype old_type, int *disp, MPI_Datatype *dt) {
379  // we only accept non-trivial matches (nsteps>2) with stride /= 1
380  // using only one vector from v
381  size_t p = *pstart_;
382  if (p >= vlen) return 0;
383  int nstrides = v[p].size;
384  int stride = v[p].stride;
385  if (nstrides < 2 || stride == 1 ) return 0;
386 
387  *pstart_ = p + 1;
388 
389  *disp = vlen > 1 ? v[p].start : 0;
390 
391  MPI_Datatype dt1;
392  xt_mpi_call(MPI_Type_vector(nstrides, 1, stride, old_type, &dt1),
393  Xt_default_comm);
394 
395  int start = v[p].start - *disp;
396  if (!start) {
397  *dt = dt1;
398  } else {
399  // (start != 0) => add offset:
400  MPI_Aint old_type_size, old_type_lb;
401  xt_mpi_call(MPI_Type_get_extent(old_type, &old_type_lb,
402  &old_type_size), Xt_default_comm);
403 
404  MPI_Aint displacement = start * old_type_size;
405  int bl2 = 1;
406  MPI_Datatype dt2;
407  xt_mpi_call(MPI_Type_create_hindexed(1, &bl2, &displacement, dt1, &dt2),
408  Xt_default_comm);
409 
410  xt_mpi_call(MPI_Type_free(&dt1), Xt_default_comm);
411 
412  *dt = dt2;
413  }
414  return nstrides;
415 }
416 
420 static bool
421 match_block_vec(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen,
422  MPI_Datatype old_type, int *disp, MPI_Datatype *dt) {
423  // using at least 3 vectors
424  size_t p = *pstart_, pstart = p;
425  if (p+2 >= vlen || v[p].stride != 1 || v[p+1].stride != 1 ) return 0;
426  int bl = v[p].size;
427  if (bl < 1 || v[p+1].size != bl) return 0;
428 
429  int vstride = v[p+1].start - v[p].start;
430 
431  p += 2;
432  while( p < vlen && v[p].stride == 1 && v[p].size == bl &&
433  v[p].start - v[p-1].start == vstride ) {
434  p++;
435  }
436  size_t n = p - pstart;
437  if (n<3) return false;
438 
439  *disp = n == vlen ? 0 : v[pstart].start;
440 
441  MPI_Datatype dt1;
442  xt_mpi_call(MPI_Type_vector((int)n, bl, vstride, old_type, &dt1),
443  Xt_default_comm);
444 
445  int start = v[pstart].start - *disp;
446 
447  *pstart_ = p;
448  if (!start) {
449  *dt = dt1;
450  } else {
451  // (start != 0) => add offset:
452  MPI_Aint old_type_size, old_type_lb;
453  xt_mpi_call(MPI_Type_get_extent(old_type, &old_type_lb,
454  &old_type_size), Xt_default_comm);
455 
456  MPI_Aint displacement = start * old_type_size;
457  int bl2 = 1;
458  MPI_Datatype dt2;
459  xt_mpi_call(MPI_Type_create_hindexed(1, &bl2, &displacement, dt1, &dt2),
460  Xt_default_comm);
461  xt_mpi_call(MPI_Type_free(&dt1), Xt_default_comm);
462  *dt = dt2;
463  }
464  return n != 0;
465 }
466 
467 static bool
468 match_contiguous(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen,
469  MPI_Datatype old_type, int *restrict disp, MPI_Datatype *dt) {
470  size_t p = *pstart_;
471  if (p >= vlen || v[p].stride != 1 || v[p].size < 2) return 0;
472 
473  int d = v[p].start - (*disp = vlen > 1 ? v[p].start : 0);
474 
475  if (!d)
476  xt_mpi_call(MPI_Type_contiguous(v[p].size, old_type, dt), Xt_default_comm) ;
477  else
478  xt_mpi_call(MPI_Type_create_indexed_block(1, v[p].size, &d, old_type, dt),
479  Xt_default_comm);
480 
481  *pstart_ = p+1;
482  return v[p].size != 0;
483 }
484 
485 static bool
486 match_indexed(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen,
487  MPI_Datatype old_type, int *disp, MPI_Datatype *dt) {
488  // we only accept non-trivial matches
489  size_t p = *pstart_, pstart = p;
490  if (p >= vlen || v[p].stride != 1 || v[p].size < 2) return 0;
491 
492  do
493  ++p;
494  while (p < vlen && v[p].stride == 1);
495 
496  size_t n = p - pstart;
497 
498  if (n < 2) return false;
499 
500  int start = (*disp = n == vlen ? 0 : v[pstart].start);
501 
502  int *restrict bl = xmalloc(2 * n * sizeof (*bl)),
503  *restrict d = bl + n;
504  bool hom_bl = true;
505  d[0] = v[pstart].start - start;
506  int bl0 = bl[0] = v[pstart].size;
507  for (size_t i = 1; i < n; i++) {
508  size_t iv = pstart + i;
509  d[i] = v[iv].start - start;
510  bl[i] = v[iv].size;
511  hom_bl &= (bl[i] == bl0);
512  }
513 
514  if (hom_bl) {
515  xt_mpi_call(MPI_Type_create_indexed_block((int)n, bl0, d, old_type, dt),
516  Xt_default_comm);
517  } else {
518  xt_mpi_call(MPI_Type_indexed((int)n, bl, d, old_type, dt), Xt_default_comm);
519  }
520 
521  *pstart_ = p;
522 
523  free(bl);
524  return n != 0;
525 }
526 
527 static int
528 gen_fallback_type(size_t set_start, size_t set_end,
529  const struct Xt_offset_ext *v,
530  size_t vlen, MPI_Datatype old_type, int *offset,
531  MPI_Datatype *dt) {
532  size_t ia = set_start;
533  size_t ib = set_end;
534  if (ib <= ia || ib > vlen) return 0;
535 
536  int n = 0;
537  for (size_t i=ia; i < ib; i++)
538  n += v[i].size;
539 
540  if (n<1) return 0;
541 
542  int start;
543  if (ia == 0 && ib == vlen) {
544  // generate absolute datatype
545  start = 0;
546  } else {
547  // generate relative datatype that gets embedded by the caller
548  start = v[ia].start;
549  }
550 
551  *offset = start;
552 
553  int *restrict d = xmalloc(sizeof (*d) * (size_t)n);
554  size_t p=0;
555 
556  for (size_t i=ia; i < ib; i++) {
557  size_t v_i_size = (size_t)(v[i].size > 0 ? v[i].size : 0);
558  for (size_t k=0; k < v_i_size; k++) {
559  d[p] = v[i].start + (int)k * v[i].stride - start;
560  p++;
561  }
562  }
563 
564  if (n==1 && d[0] == 0) {
565  // At the moment we disable the embed aspect that was used to avoid MPI_Type_dup.
566  // It turned out this creates more complexity when freeing the intermediate datatypes in the callin function.
567  // maybe we just forget the whole idea - then we can also remove the embed logic here.
568  //if (embed)
569  // *dt = old_type;
570  //else
571  xt_mpi_call(MPI_Type_dup(old_type, dt), Xt_default_comm);
572  } else {
573  xt_mpi_call(MPI_Type_create_indexed_block(n, 1, d, old_type, dt), Xt_default_comm);
574  }
575  free(d);
576 
577  return n;
578 }
579 
580 static MPI_Datatype
581 parse_stripe(const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type) {
582  /* [set_start,set_end) describes the prefix of non-matching
583  * elements in v that then need to be handled with gen_fallback_type */
584  size_t set_start = 0, set_end = 0;
585  MPI_Datatype *restrict wdt = xmalloc(sizeof(*wdt) * (size_t)vlen);
586  int *restrict wdisp = xmalloc(sizeof (*wdisp) * (size_t)vlen);
587  /* [p,vlen) is the part of v that still needs matching performed */
588  /* m is the index of the next datatype and displacements to write
589  * to wdt and wdisp respectively */
590  size_t p = 0, m = 0;
591  while (p<vlen) {
592  /* depending on wether there is a non-empty prefix, the datatype
593  * and displacement corresponding to a match need to be written
594  * to wdt[m+1] and wdisp[m+1] or wdt[m] and wdisp[m] respectively */
595  size_t mm = m + (set_start < set_end);
596  if ( match_block_vec(&p, v, vlen, old_type, wdisp+mm, wdt+mm) ||
597  match_indexed(&p, v, vlen, old_type, wdisp+mm, wdt+mm) ||
598  match_simple_vec(&p, v, vlen, old_type, wdisp+mm, wdt+mm) ||
599  match_contiguous(&p, v, vlen, old_type, wdisp+mm, wdt+mm) ) {
600  /* in case a match is found generate fallback datatype for
601  * non-matching, preceding extents */
602  if (set_start < set_end) {
603  gen_fallback_type(set_start, set_end, v, vlen, old_type, wdisp+m, wdt+m);
604  m++;
605  }
606  m++;
607  set_start = p;
608  } else {
609  /* assign ext investigated last to prefix */
610  set_end = ++p;
611  }
612  }
613  if (set_start < set_end) {
614  gen_fallback_type(set_start, set_end, v, vlen, old_type, wdisp+m, wdt+m);
615  m++;
616  }
617  size_t wlen = m;
618  MPI_Datatype result_dt;
619  if (wlen == 1 ) {
620  assert(wdisp[0] == 0);
621  result_dt = wdt[0];
622  } else {
623  MPI_Aint old_type_lb, old_type_extent;
624  MPI_Aint *restrict wbdisp = xmalloc((size_t)wlen * sizeof (*wbdisp));
625  int *restrict wblocklength
626  = xmalloc((size_t)wlen * sizeof (*wblocklength));;
627  xt_mpi_call(MPI_Type_get_extent(old_type, &old_type_lb,
628  &old_type_extent), Xt_default_comm);
629  for(size_t i=0; i<wlen; i++) {
630  wbdisp[i] = wdisp[i] * old_type_extent;
631  wblocklength[i] = 1;
632  }
633  xt_mpi_call(MPI_Type_create_struct((int)wlen, wblocklength, wbdisp,
634  wdt, &result_dt), Xt_default_comm);
635  free(wblocklength);
636  free(wbdisp);
637  for (size_t i = 0; i < wlen; i++)
638  xt_mpi_call(MPI_Type_free(wdt+i), Xt_default_comm);
639  }
640  xt_mpi_call(MPI_Type_commit(&result_dt), Xt_default_comm);
641  free(wdt);
642  free(wdisp);
643  return result_dt;
644 }
645 
646 MPI_Datatype
648  int count, MPI_Datatype old_type,
649  MPI_Comm XT_UNUSED(comm))
650 {
651  // ensure that yaxt is initialized
652  assert(xt_initialized());
653  if (count < 1) return MPI_DATATYPE_NULL;
654  return parse_stripe(v, (size_t)count, old_type);
655 }
656 
657 
658 static MPI_Datatype
659 xt_mpi_generate_compact_datatype_block(const int *disp, const int *blocklengths,
660  int count, MPI_Datatype old_type) {
661 
662  if (count < 1) return MPI_DATATYPE_NULL;
663  struct Xt_offset_ext *restrict v = xmalloc(sizeof(*v) * (size_t)count);
664  for (size_t i=0; i<(size_t)count; ++i) {
665  v[i].start = disp[i];
666  v[i].stride = 1;
667  v[i].size = blocklengths[i];
668  }
669  MPI_Datatype dt = parse_stripe(v, (size_t)count, old_type);
670  free(v);
671  return dt;
672 }
673 
674 static MPI_Datatype
675 xt_mpi_generate_compact_datatype(const int *disp, int disp_len,
676  MPI_Datatype old_type) {
677 
678  if (disp_len < 1) return MPI_DATATYPE_NULL;
679 
680  struct Xt_offset_ext *v = xmalloc(sizeof(*v) * (size_t)disp_len);
681  size_t vlen = scan_stripe(disp, (size_t)disp_len, v);
682  MPI_Datatype dt = parse_stripe(v, vlen, old_type);
683  free(v);
684  return dt;
685 }
686 
687 /* functions to handle optimizations on communicators */
689 
690 typedef unsigned long used_map_elem;
691 
692 enum {
693  used_map_elem_bits = sizeof (used_map_elem) * CHAR_BIT,
694 };
695 
697  int refcount;
698  unsigned used_map_size;
700 };
701 
702 static int
704  MPI_Comm XT_UNUSED(oldcomm), int XT_UNUSED(keyval),
705  void *XT_UNUSED(extra_state), void *XT_UNUSED(attribute_val_in),
706  void *attribute_val_out, int *flag)
707 {
708  struct xt_mpi_comm_internal_attr *new_comm_attr
709  = malloc(sizeof (struct xt_mpi_comm_internal_attr)
710  + sizeof (used_map_elem));
711  int retval;
712  if (new_comm_attr)
713  {
714  new_comm_attr->refcount = 1;
715  new_comm_attr->used_map_size = 1;
716  new_comm_attr->used_map[0] = 1U;
717  *(void **)attribute_val_out = new_comm_attr;
718  *flag = 1;
719  retval = MPI_SUCCESS;
720  } else {
721  *flag = 0;
722  retval = MPI_ERR_NO_MEM;
723  }
724  return retval;
725 }
726 
727 static int
729  MPI_Comm XT_UNUSED(comm), int XT_UNUSED(comm_keyval),
730  void *attribute_val, void *XT_UNUSED(extra_state))
731 {
732  free(attribute_val);
733  return MPI_SUCCESS;
734 }
735 
736 static int xt_mpi_tag_ub_val;
737 
738 void
739 xt_mpi_init(void) {
740  xt_mpi_call(MPI_Comm_create_keyval(xt_mpi_comm_internal_keyval_copy,
743  Xt_default_comm);
744  void *attr;
745  int flag;
746  xt_mpi_call(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &attr, &flag),
748  assert(flag);
749  xt_mpi_tag_ub_val = *(int *)attr;
750 }
751 
752 void
754  xt_mpi_call(MPI_Comm_free_keyval(&xt_mpi_comm_internal_keyval),
755  Xt_default_comm);
756 }
757 
758 static struct xt_mpi_comm_internal_attr *
760 {
761  int attr_found;
762  void *attr_val;
763  xt_mpi_call(MPI_Comm_get_attr(comm, xt_mpi_comm_internal_keyval,
764  &attr_val, &attr_found),
765  comm);
766  return attr_found ? attr_val : NULL;
767 }
768 
769 #if HAVE_DECL___BUILTIN_CTZL
770 #define ctzl(v) (__builtin_ctzl(v))
771 #elif HAVE_DECL___BUILTIN_CLZL
772 static inline int
773 ctzl(unsigned long v) {
774  enum {
775  ulong_bits = sizeof (unsigned long) * CHAR_BIT,
776  };
777  /* clear all but lowest 1 bit */
778  v = v & ~(v - 1);
779  int c = ulong_bits - __builtin_clzl(v) - 1;
780  return c;
781 }
782 #else
783 static inline int
784 ctzl(unsigned long v) {
785  enum {
786  ulong_bits = sizeof (unsigned long) * CHAR_BIT,
787  };
788  // c will be the number of zero bits on the right
789  unsigned int c = ulong_bits;
790  v &= (unsigned long)-(long)v;
791  if (v) c--;
792 #if SIZEOF_UNSIGNED_LONG * CHAR_BIT == 64
793  if (v & UINT64_C(0x00000000ffffffff)) c -= 32;
794  if (v & UINT64_C(0x0000ffff0000ffff)) c -= 16;
795  if (v & UINT64_C(0x00ff00ff00ff00ff)) c -= 8;
796  if (v & UINT64_C(0x0f0f0f0f0f0f0f0f)) c -= 4;
797  if (v & UINT64_C(0x3333333333333333)) c -= 2;
798  if (v & UINT64_C(0x5555555555555555)) c -= 1;
799 #elif SIZEOF_UNSIGNED_LONG * CHAR_BIT == 32
800  if (v & 0x0000FFFFUL) c -= 16;
801  if (v & 0x00FF00FFUL) c -= 8;
802  if (v & 0x0F0F0F0FUL) c -= 4;
803  if (v & 0x33333333UL) c -= 2;
804  if (v & 0x55555555UL) c -= 1;
805 #else
806  error "Unexpected size of long.\n"
807 #endif
808  return (int)c;
809 }
810 #endif
811 
812 MPI_Comm
813 xt_mpi_comm_smart_dup(MPI_Comm comm, int *tag_offset)
814 {
815  MPI_Comm comm_dest;
816  struct xt_mpi_comm_internal_attr *comm_xt_attr_val
818  size_t position = 0;
819  int refcount = comm_xt_attr_val ? comm_xt_attr_val->refcount : 0;
820  if (comm_xt_attr_val
822  comm_dest = comm;
823  comm_xt_attr_val->refcount = ++refcount;
824  size_t used_map_size = comm_xt_attr_val->used_map_size;
825  while (position < used_map_size
826  && comm_xt_attr_val->used_map[position] == ~(used_map_elem)0)
827  ++position;
828  if (position >= used_map_size) {
829  /* sadly, we need to recreate the value to enlarge it */
830  struct xt_mpi_comm_internal_attr *new_comm_xt_attr_val
831  = xmalloc(sizeof (*new_comm_xt_attr_val)
832  + (used_map_size + 1) * sizeof (used_map_elem));
833  new_comm_xt_attr_val->refcount = refcount;
834  new_comm_xt_attr_val->used_map_size = (unsigned)(used_map_size + 1);
835  for (size_t i = 0; i < used_map_size; ++i)
836  new_comm_xt_attr_val->used_map[i] = comm_xt_attr_val->used_map[i];
837  new_comm_xt_attr_val->used_map[used_map_size] = 1U;
838  position *= used_map_elem_bits;
839  xt_mpi_call(MPI_Comm_set_attr(comm_dest, xt_mpi_comm_internal_keyval,
840  new_comm_xt_attr_val), comm_dest);
841  } else {
842  /* not all bits are set, find first unset position and insert */
843  used_map_elem used_map_entry = comm_xt_attr_val->used_map[position],
844  unset_lsb = ~used_map_entry & (used_map_entry + 1),
845  bit_pos = (used_map_elem)ctzl(unset_lsb);
846  comm_xt_attr_val->used_map[position] = used_map_entry | unset_lsb;
847  position = position * used_map_elem_bits + (size_t)bit_pos;
848  }
849  } else {
850  struct xt_mpi_comm_internal_attr *comm_attr
851  = xmalloc(sizeof (*comm_attr) + sizeof (used_map_elem));
852  comm_attr->refcount = 1;
853  comm_attr->used_map_size = 1;
854  comm_attr->used_map[0] = 1U;
855  xt_mpi_call(MPI_Comm_dup(comm, &comm_dest), comm);
856  xt_mpi_call(MPI_Comm_set_attr(comm_dest, xt_mpi_comm_internal_keyval,
857  comm_attr), comm_dest);
858  }
859  *tag_offset = (int)(position * xt_mpi_num_tags);
860  return comm_dest;
861 }
862 
863 void
864 xt_mpi_comm_smart_dedup(MPI_Comm *comm, int tag_offset)
865 {
866  struct xt_mpi_comm_internal_attr *comm_xt_attr_val
868  int refcount = comm_xt_attr_val ? --(comm_xt_attr_val->refcount) : 0;
869  if (refcount < 1) {
870  xt_mpi_call(MPI_Comm_free(comm), MPI_COMM_WORLD);
871  *comm = MPI_COMM_NULL;
872  } else {
873  size_t position = (size_t)tag_offset / xt_mpi_num_tags,
874  map_elem = position / used_map_elem_bits,
875  in_elem_bit = position % used_map_elem_bits;
876  comm_xt_attr_val->used_map[map_elem] &= ~((used_map_elem)1 << in_elem_bit);
877  }
878 }
879 
880 void
882  struct xt_mpi_comm_internal_attr *comm_attr
883  = xmalloc(sizeof (*comm_attr) + sizeof (used_map_elem));
884  comm_attr->refcount = 1;
885  comm_attr->used_map_size = 1;
886  comm_attr->used_map[0] = 1U;
887  xt_mpi_call(MPI_Comm_set_attr(comm, xt_mpi_comm_internal_keyval,
888  comm_attr), comm);
889 }
890 
891 bool
892 xt_mpi_test_some(int *restrict num_req,
893  MPI_Request *restrict req,
894  int *restrict ops_completed, MPI_Comm comm)
895 {
896  int done_count;
897  size_t num_req_ = (size_t)*num_req;
898  xt_mpi_call(MPI_Testsome(*num_req, req, &done_count, ops_completed,
899  MPI_STATUSES_IGNORE), comm);
900  if (done_count != MPI_UNDEFINED) {
901  if (num_req_ > (size_t)done_count) {
902  size_t j = num_req_;
903  for (size_t i = 0;
904  i < (size_t)done_count && j >= num_req_ - (size_t)done_count;
905  ++i)
906  if (ops_completed[i] < (int)num_req_ - done_count) {
907  while (req[--j] == MPI_REQUEST_NULL);
908  req[ops_completed[i]] = req[j];
909  }
910  num_req_ -= (size_t)done_count;
911  }
912  else
913  num_req_ = 0;
914  }
915  *num_req = (int)num_req_;
916  return num_req_ == 0;
917 }
918 
919 
920 /*
921  * Local Variables:
922  * c-basic-offset: 2
923  * coding: utf-8
924  * indent-tabs-mode: nil
925  * show-trailing-whitespace: t
926  * require-trailing-newline: t
927  * End:
928  */
@ MPI_COMM_WORLD
Definition: core.h:73
@ MPI_COMM_NULL
Definition: core.h:74
int MPI_Comm
Definition: core.h:64
#define XT_UNUSED(x)
Definition: core.h:84
add versions of standard API functions not returning on error
#define xmalloc(size)
Definition: ppm_xfuncs.h:70
used_map_elem used_map[]
Definition: xt_mpi.c:699
base definitions header file
int xt_initialized(void)
Definition: xt_init.c:107
MPI_Datatype xt_mpi_generate_datatype(int const *displacements, int count, MPI_Datatype old_type, MPI_Comm comm)
Definition: xt_mpi.c:264
MPI_Comm xt_mpi_comm_smart_dup(MPI_Comm comm, int *tag_offset)
Definition: xt_mpi.c:813
unsigned long used_map_elem
Definition: xt_mpi.c:690
void xt_mpi_error(int error_code, MPI_Comm comm)
Definition: xt_mpi.c:80
static int xt_mpi_tag_ub_val
Definition: xt_mpi.c:736
void xt_mpi_init(void)
Definition: xt_mpi.c:739
static struct xt_mpi_comm_internal_attr * xt_mpi_comm_get_internal_attr(MPI_Comm comm)
Definition: xt_mpi.c:759
void xt_mpi_finalize(void)
Definition: xt_mpi.c:753
static MPI_Datatype parse_stripe(const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type)
Definition: xt_mpi.c:581
static MPI_Datatype xt_mpi_generate_compact_datatype_block(const int *disp, const int *blocklengths, int count, MPI_Datatype old_type)
Definition: xt_mpi.c:659
static int xt_mpi_comm_internal_keyval_delete(MPI_Comm XT_UNUSED(comm), int XT_UNUSED(comm_keyval), void *attribute_val, void *XT_UNUSED(extra_state))
Definition: xt_mpi.c:728
static size_t scan_stripe(const int *disp, size_t disp_len, struct Xt_offset_ext *restrict v)
Definition: xt_mpi.c:323
static int gen_fallback_type(size_t set_start, size_t set_end, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *offset, MPI_Datatype *dt)
Definition: xt_mpi.c:528
static int xt_mpi_comm_internal_keyval_copy(MPI_Comm XT_UNUSED(oldcomm), int XT_UNUSED(keyval), void *XT_UNUSED(extra_state), void *XT_UNUSED(attribute_val_in), void *attribute_val_out, int *flag)
Definition: xt_mpi.c:703
@ used_map_elem_bits
Definition: xt_mpi.c:693
static bool match_contiguous(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *restrict disp, MPI_Datatype *dt)
Definition: xt_mpi.c:468
int xt_mpi_comm_internal_keyval
Definition: xt_mpi.c:688
void xt_mpi_comm_smart_dedup(MPI_Comm *comm, int tag_offset)
Definition: xt_mpi.c:864
bool xt_mpi_test_some(int *restrict num_req, MPI_Request *restrict req, int *restrict ops_completed, MPI_Comm comm)
Definition: xt_mpi.c:892
MPI_Datatype xt_mpi_generate_datatype_stripe(const struct Xt_offset_ext *v, int count, MPI_Datatype old_type, MPI_Comm XT_UNUSED(comm))
Definition: xt_mpi.c:647
static int ctzl(unsigned long v)
Definition: xt_mpi.c:784
static bool match_block_vec(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *disp, MPI_Datatype *dt)
Definition: xt_mpi.c:421
static int match_simple_vec(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *disp, MPI_Datatype *dt)
Definition: xt_mpi.c:377
static MPI_Datatype xt_mpi_generate_compact_datatype(int const *disp, int disp_len, MPI_Datatype old_type)
Definition: xt_mpi.c:675
MPI_Datatype xt_mpi_generate_datatype_block(const int *displacements, const int *blocklengths, int count, MPI_Datatype old_type, MPI_Comm comm)
Definition: xt_mpi.c:227
static bool match_indexed(size_t *pstart_, const struct Xt_offset_ext *v, size_t vlen, MPI_Datatype old_type, int *disp, MPI_Datatype *dt)
Definition: xt_mpi.c:486
void xt_mpi_comm_mark_exclusive(MPI_Comm comm)
Definition: xt_mpi.c:881
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition: xt_mpi.h:68
@ xt_mpi_num_tags