Yet Another eXchange Tool  0.9.0
xt_idxsection.c
Go to the documentation of this file.
1 
12 /*
13  * Keywords:
14  * Maintainer: Jörg Behrens <behrens@dkrz.de>
15  * Moritz Hanke <hanke@dkrz.de>
16  * Thomas Jahns <jahns@dkrz.de>
17  * URL: https://doc.redmine.dkrz.de/yaxt/html/
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are
21  * met:
22  *
23  * Redistributions of source code must retain the above copyright notice,
24  * this list of conditions and the following disclaimer.
25  *
26  * Redistributions in binary form must reproduce the above copyright
27  * notice, this list of conditions and the following disclaimer in the
28  * documentation and/or other materials provided with the distribution.
29  *
30  * Neither the name of the DKRZ GmbH nor the names of its contributors
31  * may be used to endorse or promote products derived from this software
32  * without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  */
46 #ifdef HAVE_CONFIG_H
47 #include <config.h>
48 #endif
49 
50 #include <assert.h>
51 #include <limits.h>
52 #include <stdlib.h>
53 #include <stdio.h>
54 #include <string.h>
55 
56 #include "xt_arithmetic_util.h"
57 #include "xt/xt_idxlist.h"
58 #include "xt_idxlist_internal.h"
59 #include "xt/xt_idxempty.h"
60 #include "xt/xt_idxvec.h"
61 #include "xt/xt_idxsection.h"
62 #include "xt_idxsection_internal.h"
63 #include "xt/xt_mpi.h"
64 #include "xt/mergesort.h"
65 #include "xt/quicksort.h"
66 #include "xt_idxlist_unpack.h"
67 #include "core/ppm_xfuncs.h"
68 #include "core/core.h"
69 #include "instr.h"
70 
71 static void
73 
74 static size_t
76 
77 static void
78 idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size,
79  int *position, MPI_Comm comm);
80 
81 static Xt_idxlist
83 
84 static void
85 idxsection_get_indices(Xt_idxlist idxlist, Xt_int *indices);
86 
87 static const Xt_int *
89 
90 static void
91 idxsection_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe ** stripes,
92  int * num_stripes);
93 
94 static int
95 idxsection_get_index_at_position(Xt_idxlist idxlist, int position,
96  Xt_int * index);
97 
98 static int
100  int * position);
101 
102 static int
104  int * position, int offset);
105 static int
106 idxsection_get_positions_of_indices(Xt_idxlist body_idxlist, Xt_int const * selection_idx,
107  int num_selection, int *positions,
108  int single_match_only);
109 
110 static Xt_int
112 
113 static Xt_int
115 
116 static const struct xt_idxlist_vtable idxsection_vtable = {
118  .get_pack_size = idxsection_get_pack_size,
119  .pack = idxsection_pack,
120  .copy = idxsection_copy,
121  .get_indices = idxsection_get_indices,
122  .get_indices_const = idxsection_get_indices_const,
123  .get_index_stripes = idxsection_get_index_stripes,
124  .get_index_at_position = idxsection_get_index_at_position,
125  .get_indices_at_positions = NULL,
126  .get_position_of_index = idxsection_get_position_of_index,
127  .get_positions_of_indices = idxsection_get_positions_of_indices,
128  .get_position_of_index_off = idxsection_get_position_of_index_off,
129  .get_positions_of_indices_off = NULL,
130  .get_min_index = idxsection_get_min_index,
131  .get_max_index = idxsection_get_max_index,
132  .get_bounding_box = NULL,
133  .idxlist_pack_code = SECTION,
134 };
135 
136 /* descriptor for per-dimension extent and stride */
137 struct dim_desc
138 {
141 };
142 
143 static MPI_Datatype dim_desc_dt;
144 
145 
147 
149 
150  struct Xt_idxlist_ parent;
151 
153 
156 
159  int ndim;
160  struct dim_desc dims[];
161 };
162 
163 static int
165 
166 void
168 {
169  struct dim_desc dim_desc;
170 
171  MPI_Aint base_address, local_size_address;
172 
173  MPI_Get_address(&dim_desc, &base_address);
174  MPI_Get_address(&dim_desc.local_size, &local_size_address);
175 
176  enum { num_dt_components = 2 };
177  int block_lengths[num_dt_components] = { 4, 1 };
178  MPI_Aint displacements[num_dt_components]
179  = {0, local_size_address - base_address };
180  MPI_Datatype types[num_dt_components]
181  = { Xt_int_dt, MPI_INT },
182  dim_desc_dt_unaligned;
183  xt_mpi_call(MPI_Type_create_struct(num_dt_components,
184  block_lengths, displacements, types,
185  &dim_desc_dt_unaligned), Xt_default_comm);
186  xt_mpi_call(MPI_Type_create_resized(dim_desc_dt_unaligned, 0,
187  (MPI_Aint)sizeof(dim_desc),
188  &dim_desc_dt), Xt_default_comm);
189  xt_mpi_call(MPI_Type_free(&dim_desc_dt_unaligned), Xt_default_comm);
190  xt_mpi_call(MPI_Type_commit(&dim_desc_dt), Xt_default_comm);
191 }
192 
193 void
195 {
196  xt_mpi_call(MPI_Type_free(&dim_desc_dt), Xt_default_comm);
197 }
198 
199 Xt_idxlist xt_idxsection_new(Xt_int start, int num_dimensions,
200  const Xt_int global_size[num_dimensions],
201  const int local_size[num_dimensions],
202  const Xt_int local_start[num_dimensions]) {
203 
204  INSTR_DEF(instr,"xt_idxsection_new")
205  INSTR_START(instr);
206  // ensure that yaxt is initialized
207  assert(xt_initialized());
208 
209  Xt_idxsection idxsection = NULL;
210  int num_indices;
211  if (num_dimensions > 0) {
212  idxsection = xmalloc(sizeof (*idxsection)
213  + (size_t)num_dimensions *
214  sizeof (idxsection->dims[0]));
215 
216  idxsection->global_start_index = start;
217  idxsection->ndim = num_dimensions;
218 
219  idxsection->index_array_cache = NULL;
220 
221  for (int i = 0; i < num_dimensions; ++i) {
222  idxsection->dims[i].global_size = global_size[i];
223  idxsection->dims[i].local_size = local_size[i];
224  idxsection->dims[i].local_start = local_start[i];
225  }
226  num_indices = idxsection_get_num_indices(idxsection);
227  } else {
228  num_indices = 0;
229  }
230  if (num_indices == 0) {
231  free(idxsection);
232  return xt_idxempty_new();
233  }
234  Xt_idxlist_init(&idxsection->parent, &idxsection_vtable, num_indices);
235  idxsection->local_start_index = start;
236  idxsection->dims[num_dimensions - 1].global_stride =
237  (Xt_int)(Xt_isign(global_size[num_dimensions - 1]) *
238  isign(local_size[num_dimensions - 1]));
239  idxsection->dims[num_dimensions - 1].local_stride = 1;
240 
241  // compute local and global stride
242  // (local stride is always positive, global stride can be negative)
243  for (int i = num_dimensions - 2; i >= 0; --i) {
244  idxsection->dims[i].global_stride
245  = (Xt_int)(idxsection->dims[i+1].global_stride * global_size[i + 1]
246  * Xt_isign((Xt_int)(idxsection->dims[i+1].global_stride
247  * global_size[i + 1]))
249  * (Xt_int)isign(local_size[i]))));
250  idxsection->dims[i].local_stride
251  = (Xt_int)(idxsection->dims[i + 1].local_stride * local_size[i + 1]
252  * Xt_isign((Xt_int)(idxsection->dims[i + 1].local_stride
253  * (Xt_int)local_size[i + 1])));
254  }
255 
256  // compute the local start index
257  // depends on global size and sign of local and global size
258  for (int i = num_dimensions - 1; i >= 0; --i) {
259  if (global_size[i] > 0)
260  idxsection->local_start_index
261  = (Xt_int)(idxsection->local_start_index
262  + (XT_INT_ABS(idxsection->dims[i].global_stride)
263  * local_start[i]));
264  else
265  idxsection->local_start_index
266  = (Xt_int)(idxsection->local_start_index
267  - (XT_INT_ABS(idxsection->dims[i].global_stride)
268  * (global_size[i] + local_start[i] + 1)));
269  if (local_size[i] < 0)
270  idxsection->local_start_index
271  = (Xt_int)(idxsection->local_start_index
272  - (XT_INT_ABS(idxsection->dims[i].global_stride)
273  * (local_size[i] + 1)
274  * Xt_isign(global_size[i])));
275  }
276 
277  // due the possibility of negative local and global sizes, the minimum and
278  // maximum can be in any corner of the n-dimensional section
279  idxsection->min_index_cache = idxsection->local_start_index;
280  idxsection->max_index_cache = idxsection->local_start_index;
281  for (int i = 0; i < num_dimensions; ++i) {
282 
283  // if either local and global size are negative
284  if ((global_size[i] < 0) ^ (local_size[i] < 0))
285  idxsection->min_index_cache
286  = (Xt_int)(idxsection->min_index_cache
287  + (Xt_int)(idxsection->dims[i].global_stride *
288  (Xt_int)(abs(local_size[i]) - 1)));
289  else // if local and global size are both positive or negative
290  idxsection->max_index_cache
291  = (Xt_int)(idxsection->max_index_cache
292  + (Xt_int)(idxsection->dims[i].global_stride *
293  (Xt_int)(abs(local_size[i]) - 1)));
294  }
295 
296  INSTR_STOP(instr);
297 
298  return (Xt_idxlist)idxsection;
299 }
300 
301 static void
303 
304  if (data == NULL) return;
305 
306  Xt_idxsection section = (Xt_idxsection)data;
307 
308  free(section->index_array_cache);
309  free(section);
310 }
311 
312 static size_t
314 
315  Xt_idxsection section = (Xt_idxsection)data;
316 
317  int size_header, size_dim_descs, size_xt_int;
318 
319  xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
320  xt_mpi_call(MPI_Pack_size(2, Xt_int_dt, comm, &size_xt_int), comm);
321  xt_mpi_call(MPI_Pack_size(section->ndim, dim_desc_dt,
322  comm, &size_dim_descs), comm);
323 
324  return (size_t)size_header + (size_t)size_dim_descs
325  + (size_t)size_xt_int;
326 }
327 
328 static void
329 idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size,
330  int *position, MPI_Comm comm) {
331 
332  INSTR_DEF(instr,"idxsection_pack")
333  INSTR_START(instr);
334 
335  assert(data);
336  Xt_idxsection section = (Xt_idxsection)data;
337  int header[2] = { SECTION, section->ndim };
338  Xt_int starts[2]
339  = { section->global_start_index, section->local_start_index };
340  xt_mpi_call(MPI_Pack(header, 2, MPI_INT, buffer,
341  buffer_size, position, comm), comm);
342  xt_mpi_call(MPI_Pack(starts, 2, Xt_int_dt, buffer,
343  buffer_size, position, comm), comm);
344  xt_mpi_call(MPI_Pack(section->dims, section->ndim, dim_desc_dt,
345  buffer, buffer_size, position, comm), comm);
346  INSTR_STOP(instr);
347 }
348 
349 Xt_idxlist xt_idxsection_unpack(void *buffer, int buffer_size, int *position,
350  MPI_Comm comm) {
351 
352  INSTR_DEF(instr,"xt_idxsection_unpack")
353  INSTR_START(instr);
354 
355  int ndim;
356  xt_mpi_call(MPI_Unpack(buffer, buffer_size, position, &ndim, 1, MPI_INT,
357  comm), comm);
358  Xt_idxsection section
359  = xmalloc(sizeof (*section) + (size_t)ndim * sizeof(section->dims[0]));
360  xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
361  &section->global_start_index, 1, Xt_int_dt, comm),
362  comm);
363  xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
364  &section->local_start_index, 1, Xt_int_dt, comm),
365  comm);
366  assert(ndim > 0);
367  section->index_array_cache = NULL;
368  section->ndim = ndim;
369 
370  xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
371  section->dims, ndim, dim_desc_dt, comm),comm);
372 
373  // due to the possibility of negative local and global sizes, the minimum and
374  // maximum can be in any corner of the n-dimensional section
375  section->min_index_cache = section->local_start_index;
376  section->max_index_cache = section->local_start_index;
377  for (int i = 0; i < ndim; ++i) {
378 
379  // if either local or global size is negative
380  if ((section->dims[i].global_size < 0) ^ (section->dims[i].local_size < 0))
381  section->min_index_cache =
382  (Xt_int)(section->min_index_cache + section->dims[i].global_stride
383  * (abs(section->dims[i].local_size) - 1));
384  else // if local and global size are both positive or negative
385  section->max_index_cache
386  = (Xt_int)(section->max_index_cache
387  + (Xt_int)(section->dims[i].global_stride
388  * (Xt_int)(abs(section->dims[i].local_size)
389  - 1)));
390  }
391  int num_indices = idxsection_get_num_indices(section);
392  Xt_idxlist_init(&section->parent, &idxsection_vtable, num_indices);
393  INSTR_STOP(instr);
394  return (Xt_idxlist)section;
395 }
396 
399  Xt_idxlist dst_idxlist) {
400 
401  // intersection between an idxsection and a general idxlist:
402  //
403  // performance picture:
404  // - src_idxsection is treated as too big for elemental transforms/access
405  // - dst_idxlist is considered to be small enough (subdomain like) for elemental usage
406 
407  INSTR_DEF(instr,"idxsection_get_intersection_with_other_idxlist")
408  INSTR_START(instr);
409 
410  int num_dst_idx = xt_idxlist_get_num_indices(dst_idxlist);
411 
412  Xt_int const* dst_idx = xt_idxlist_get_indices_const(dst_idxlist);
413  int * pos;
414  int single_match_only = 0;
415  int i, j;
416 
417  for (i = 1; i < num_dst_idx; ++i)
418  if (dst_idx[i] < dst_idx[i-1])
419  break;
420 
421  Xt_int const * sorted_dst_idx;
422  Xt_int * temp_dst_idx = NULL;
423 
424  // if the destination indices are not sorted
425  if (num_dst_idx > 1 && i != num_dst_idx) {
426 
427  temp_dst_idx = xmalloc((size_t)num_dst_idx * sizeof(*temp_dst_idx));
428  memcpy(temp_dst_idx, dst_idx, (size_t)num_dst_idx * sizeof(*temp_dst_idx));
429 
430  xt_mergesort_index(temp_dst_idx, num_dst_idx, NULL, 0);
431 
432  sorted_dst_idx = temp_dst_idx;
433  } else
434  sorted_dst_idx = dst_idx;
435 
436  pos = xmalloc((size_t)num_dst_idx * sizeof(*pos));
437  int num_unmatched = idxsection_get_positions_of_indices(
438  src_idxsection, sorted_dst_idx, num_dst_idx, pos,
439  single_match_only);
440  int num_inter_idx = num_dst_idx - num_unmatched;
441  Xt_idxlist result;
442  if (num_inter_idx != 0) {
443  Xt_int *intersection = xmalloc((size_t)num_inter_idx
444  * sizeof(*intersection));
445 
446  for(i = 0, j = 0; i < num_dst_idx && j < num_inter_idx; i++) {
447  intersection[j] = sorted_dst_idx[i];
448  j += (pos[i] >= 0);
449  }
450 
451  result = xt_idxvec_new(intersection, num_inter_idx);
452  free(intersection);
453  } else {
454  result = xt_idxempty_new();
455  }
456 
457  free(temp_dst_idx);
458  free(pos);
459 
460  INSTR_STOP(instr);
461  return result;
462  // return xt_idxvec_pinned_new(intersection, num_inter_idx);
463 }
464 
467  INSTR_DEF(instr,"idxsection_get_intersection.part")
468 
469  // both lists are index section:
470 
471  Xt_idxsection idxsection_src, idxsection_dst;
472 
473  idxsection_src = (Xt_idxsection)idxlist_src;
474  idxsection_dst = (Xt_idxsection)idxlist_dst;
475 
476  if (idxsection_src->ndim != idxsection_dst->ndim ||
477  idxsection_src->global_start_index != idxsection_dst->global_start_index)
478  return xt_default_isect(idxlist_src, idxlist_dst);
479 
480  int i;
481 
482  // the size of first global dimension is irrelevant,
483  // the others have to be identically
484  for (i = 1; i < idxsection_src->ndim; ++i)
485  if (XT_INT_ABS(idxsection_src->dims[i].global_size)
486  != XT_INT_ABS(idxsection_dst->dims[i].global_size))
488  idxlist_src, idxlist_dst);
489 
491  int *local_size;
492 
493  INSTR_START(instr);
494 
495  // dimension information for the intersection
496  local_start = xmalloc((size_t)idxsection_src->ndim * sizeof(*local_start));
497  local_size = xmalloc((size_t)idxsection_src->ndim * sizeof(*local_size));
498  global_size = xmalloc((size_t)idxsection_src->ndim * sizeof(*global_size));
499 
500  // indices in an intersection have to be sorted in ascending order. therefore,
501  // local and global sizes of the intersection have to be positive
502 
503  for (i = 0; i < idxsection_src->ndim; ++i) {
504 
505  Xt_int src_start, src_end, dst_start, dst_end, local_end;
506 
507  // the start value is the minmum position in the current dimension (with positive
508  // size)
509  // in case the global size of src or dst is negative the start value has be be
510  // adjusted accordingly
511 
512  if (idxsection_src->dims[i].global_size >= 0)
513  src_start = idxsection_src->dims[i].local_start;
514  else
515  src_start = (Xt_int)(-idxsection_src->dims[i].global_size
516  - abs(idxsection_src->dims[i].local_size)
517  - idxsection_src->dims[i].local_start);
518 
519  if (idxsection_dst->dims[i].global_size >= 0)
520  dst_start = idxsection_dst->dims[i].local_start;
521  else
522  dst_start = (Xt_int)(-idxsection_dst->dims[i].global_size
523  - abs(idxsection_dst->dims[i].local_size)
524  - idxsection_dst->dims[i].local_start);
525 
526  src_end = (Xt_int)(src_start
527  + (Xt_int)abs(idxsection_src->dims[i].local_size));
528  dst_end = (Xt_int)(dst_start
529  + (Xt_int)abs(idxsection_dst->dims[i].local_size));
530 
531  local_start[i] = (src_start > dst_start)?src_start:dst_start;
532  local_end = (src_end > dst_end)?dst_end:src_end;
533 
534  if (local_end <= local_start[i]) {
535  free(global_size);
536  free(local_size);
537  free(local_start);
538  INSTR_STOP(instr);
539  return xt_idxempty_new();
540  }
541 
542  local_size[i] = (int)(local_end - local_start[i]);
543  global_size[i] = XT_INT_ABS(idxsection_src->dims[i].global_size);
544  }
545 
546  Xt_idxlist intersection
547  = xt_idxsection_new(idxsection_src->global_start_index,
548  idxsection_src->ndim, global_size,
550 
551  free(global_size);
552  free(local_size);
553  free(local_start);
554 
555  INSTR_STOP(instr);
556  return intersection;
557 }
558 
559 static Xt_idxlist
561 
562  Xt_idxsection src = (Xt_idxsection)idxlist;
563 
564  int num_dimensions = src->ndim;
565 
566  Xt_idxsection idxsection = xmalloc(sizeof (*idxsection)
567  + (size_t)num_dimensions
568  * sizeof (idxsection->dims[0]));
569  *idxsection = *src;
570  idxsection->index_array_cache = NULL;
571 
572  memcpy(idxsection->dims, src->dims, (size_t)num_dimensions *
573  sizeof (src->dims[0]));
574 
575  return (Xt_idxlist)idxsection;
576 }
577 
578 static int
580 
581  int i;
582  long long size = 1;
583 
584  for (i = 0; i < section->ndim; ++i)
585  size *= abs(section->dims[i].local_size);
586  assert(size <= INT_MAX);
587 
588  return (int)size;
589 }
590 
591 
592 static int
594  int ndim, struct dim_desc dims[ndim])
595 {
596 
597  int abs_local_size = abs(dims[0].local_size);
598 
599  if (ndim == 1)
600  {
601  if (dims[0].global_stride > 0)
602  for (int i = 0; i < abs_local_size; ++i)
603  indices[i] = (Xt_int)(start_index + i);
604  else
605  for (int i = 0; i < abs_local_size; ++i)
606  indices[i] = (Xt_int)(start_index - i);
607  return abs_local_size;
608  }
609  else
610  {
611  int indices_written = 0, overflow = 0;
612  assert(ndim > 1);
613  for (int dim_ofs = 0; dim_ofs < abs_local_size; ++dim_ofs)
614  {
615  int indices_written_temp
617  (Xt_int)(start_index
618  + dim_ofs * dims[0].global_stride),
619  indices + indices_written,
620  ndim - 1, dims + 1);
621  overflow |= (indices_written_temp > INT_MAX - indices_written);
622  indices_written += indices_written_temp;
623  }
624  assert(!overflow);
625  return indices_written;
626  }
627 }
628 
629 static void
631  INSTR_DEF(instr,"idxsection_get_indices")
632  INSTR_START(instr);
633  Xt_idxsection section = (Xt_idxsection)idxlist;
634 
635  int num_indices = idxlist->num_indices;
636 
637  if (num_indices > 0) {
638 
639  // if the indices are already computed
640  if (section->index_array_cache != NULL) {
641  memcpy(indices, section->index_array_cache,
642  (size_t)num_indices * sizeof(*indices));
643  goto fun_exit;
644  } else
645  section->index_array_cache
646  = xmalloc((size_t)num_indices * sizeof(*(section->index_array_cache)));
647 
648  // if this routine was called by idxsection_get_indices_const
649  if (indices == NULL)
650  indices = section->index_array_cache;
651 
653  section->ndim, section->dims);
654 
655  if (section->index_array_cache != indices)
656  memcpy(section->index_array_cache, indices,
657  (size_t)num_indices * sizeof(*indices));
658  }
659  fun_exit: ;
660  INSTR_STOP(instr);
661 }
662 
663 static Xt_int const*
665 
666  Xt_idxsection idxsection = (Xt_idxsection)idxlist;
667 
668  if (idxsection->index_array_cache == NULL)
669  idxsection_get_indices(idxlist, NULL);
670 
671  return idxsection->index_array_cache;
672 }
673 
674 
675 static void
677  int * num_stripes) {
678 
679  INSTR_DEF(instr,"idxsection_get_index_stripes.part")
680 
681  Xt_idxsection section = (Xt_idxsection)idxlist;
682 
683  int ndim = section->ndim;
684  struct dim_desc *restrict dims = section->dims;
685 
686  size_t nstripes = dims[ndim-1].local_size != 0;
687 
688  for (int i = 0; i < ndim-1; ++i)
689  nstripes *= (size_t)abs(dims[i].local_size);
690 
691  if (nstripes == 0) {
692  *num_stripes = (int)nstripes;
693  return;
694  }
695 
696  INSTR_START(instr);
697 
698  struct Xt_stripe *restrict p = *stripes;
699  if ((size_t)*num_stripes < nstripes)
700  p = xrealloc(p, nstripes * sizeof(**stripes));
701 
702  enum { curr_local_position_auto_size=16 };
703  Xt_int curr_local_position_auto[curr_local_position_auto_size];
704  Xt_int *restrict curr_local_position;
705  if (ndim-2 <= curr_local_position_auto_size) {
706  curr_local_position = curr_local_position_auto;
707  for (int i = 0; i < ndim-1; ++i)
708  curr_local_position[i] = 0;
709  } else
710  curr_local_position
711  = xcalloc((size_t)(ndim-2), sizeof(*curr_local_position));
712 
713  for (size_t i = 0; i < nstripes; ++i) {
714 
715  p[i].start = section->local_start_index;
716  p[i].nstrides = abs(dims[ndim-1].local_size);
717  p[i].stride = 1;
718 
719  for (int j = 0; j < ndim - 1; ++j)
720  p[i].start = (Xt_int)(p[i].start
721  + curr_local_position[j]
722  * dims[j].global_stride);
723 
724  for (int j = ndim - 2; j >= 0; --j)
725  if (curr_local_position[j] < abs(dims[j].local_size) - 1) {
726  curr_local_position[j]++;
727  break;
728  } else
729  curr_local_position[j] = 0;
730  }
731  *stripes = p;
732  *num_stripes = (int)nstripes;
733  if (curr_local_position != curr_local_position_auto)
734  free(curr_local_position);
735 
736  INSTR_STOP(instr);
737 }
738 
739 static int
741  Xt_int * index) {
742 
743  Xt_idxsection section = (Xt_idxsection)idxlist;
744 
745  if (position < 0) return 1;
746 
747  Xt_int temp_index;
748 
749  temp_index = section->local_start_index;
750 
751  int dim;
752  Xt_int curr_local_position;
753  long long pos = (long long)position;
754 
755  for (dim = 0; dim < section->ndim; ++dim) {
756 
757  curr_local_position = (Xt_int)(pos / (long long)section->dims[dim].local_stride);
758 
759  if (curr_local_position >= abs(section->dims[dim].local_size))
760  return 1;
761 
762  temp_index = (Xt_int)(temp_index
763  + curr_local_position
764  * section->dims[dim].global_stride);
765  /* FIXME: assert(section->dims[dim].local_stride is in [-INT_MAX,INT_MAX]) */
766  pos %= (long long)section->dims[dim].local_stride;
767  }
768 
769  *index = temp_index;
770 
771  return 0;
772 }
773 
774 static int
776  int * position) {
777 
778  INSTR_DEF(instr,"idxsection_get_position_of_index.part")
779 
780  Xt_idxsection section = (Xt_idxsection)idxlist;
781  *position = -1;
782 
783  if (index < section->min_index_cache || index > section->max_index_cache)
784  return 1;
785 
786  int retval = 1;
787 
788  INSTR_START(instr);
789 
790  // normalise index (global start of indices at 0)
791  index = (Xt_int)(index - section->global_start_index);
792 
793  int i;
794  int temp_position = 0;
795 
796  for (i = 0; i < section->ndim; ++i) {
797 
798  Xt_int abs_global_stride
799  = XT_INT_ABS(section->dims[i].global_stride);
800 
801  Xt_int curr_global_position
802  = (Xt_int)(index / abs_global_stride);
803 
804  // in case the global size is negative, we have to adjust the global position,
805  // because the ordering of indices in this dimension is inverted
806  if (section->dims[i].global_size < 0)
807  curr_global_position
808  = (Xt_int)(-section->dims[i].global_size - curr_global_position - 1);
809 
810  index = (Xt_int)(index % abs_global_stride);
811 
812  if (curr_global_position < section->dims[i].local_start)
813  goto fun_exit;
814 
815  Xt_int curr_local_position
816  = (Xt_int)(curr_global_position - section->dims[i].local_start);
817 
818  // same adjustment for local position as for the global one before
819  if (section->dims[i].local_size < 0)
820  curr_local_position
821  = (Xt_int)(-section->dims[i].local_size - curr_local_position - 1);
822 
823  if (curr_local_position >= abs(section->dims[i].local_size))
824  goto fun_exit;
825 
826  temp_position += (int)(curr_local_position * section->dims[i].local_stride);
827  }
828 
829  *position = temp_position;
830 
831  retval = 0;
832 
833  fun_exit: ;
834  INSTR_STOP(instr);
835  return retval;
836 }
837 
838 static int
840  const Xt_int selection_idx[],
841  int num_selection,
842  int positions[],
843  int single_match_only) {
844 
845  INSTR_DEF(instr,"idxsection_get_positions_of_indices_v1.part")
846 
847  if (num_selection < 1) return 0; //nothing to do
848 
849  if (num_selection == 1)
850  return idxsection_get_position_of_index(body_idxlist, *selection_idx,
851  positions);
852 
853  int num_unmatched = 0;
854 
855  if (!single_match_only) {
856  // this is the easy case, we don't care about multiple uses of the same position
857  for (int i = 0; i < num_selection; ++i)
858  num_unmatched
859  += idxsection_get_position_of_index(body_idxlist, selection_idx[i],
860  &positions[i]);
861  return num_unmatched;
862  }
863 
864  INSTR_START(instr);
865 
866  for (size_t i = 1; i < (size_t)num_selection; ++i)
867  if (selection_idx[i] < selection_idx[i-1])
868  goto unsorted;
869 
870  // indices are sorted
871  {
872  // we need an index that is different from the current one
873  Xt_int prev_index = (Xt_int)(selection_idx[0] - 1);
874 
875  for (size_t i = 0; i < (size_t)num_selection; i++) {
876 
877  Xt_int curr_index = selection_idx[i];
878 
879  if (prev_index != curr_index) {
880 
881  num_unmatched
882  += idxsection_get_position_of_index(body_idxlist, curr_index,
883  positions + i);
884  prev_index = curr_index;
885 
886  } else {
887  // for an idxsection there is a unique map from indices to positions,
888  // we got the same index again, so there is no match left:
889  positions[i] = -1;
890  num_unmatched++;
891  }
892  }
893  }
894  goto end;
895  // indices are not sorted
896 unsorted:
897  {
898  // the remaining (single_match_only) case follows:
899  idxpos_type *v = xmalloc((size_t)num_selection * sizeof(*v) );
900  for (size_t i = 0; i < (size_t)num_selection; i++) {
901  v[i].idx = selection_idx[i];
902  v[i].pos = (int)i;
903  }
904  xt_mergesort_idxpos(v, (size_t)num_selection);
905  Xt_int last_jx = (Xt_int)(v[0].idx - 1); // any index that does not equal v[0].idx will do
906  for (size_t i = 0; i < (size_t)num_selection; i++) {
907  int j = v[i].pos;
908  Xt_int jx = v[i].idx;
909  if (jx != last_jx) {
910  num_unmatched
911  += idxsection_get_position_of_index(body_idxlist, jx, &positions[j]);
912  last_jx = jx;
913  } else {
914  // for an idxsection there is a unique map from indices to positions,
915  // we got the same index again, so there is no match left:
916  positions[j] = -1;
917  num_unmatched++;
918  }
919  }
920  free(v);
921  }
922 end:
923  INSTR_STOP(instr);
924  return num_unmatched;
925 }
926 
927 static
929  const Xt_int selection_idx[],
930  int num_selection, int positions[],
931  int single_match_only) {
932 
933  INSTR_DEF(instr,"idxsection_get_positions_of_indices_v2.part")
934 
935  if (num_selection < 1) return 0; //nothing to do
936 
937  if (num_selection == 1)
938  return idxsection_get_position_of_index(body_idxlist, *selection_idx, positions);
939 
940  INSTR_START(instr);
941 
942  Xt_int * temp_selection_idx = NULL;
943  const Xt_int *restrict sorted_selection_idx;
944  int * selection_pos = NULL;
945 
946  size_t i;
947  for (i = 1; i < (size_t)num_selection; ++i)
948  if (selection_idx[i] < selection_idx[i-1])
949  goto unsorted;
950 
951  sorted_selection_idx = selection_idx;
952  goto sorted;
953 unsorted:
954  // the indices are not sorted
955  temp_selection_idx
956  = xmalloc((size_t)num_selection * sizeof(*temp_selection_idx));
957  memcpy(temp_selection_idx, selection_idx,
958  (size_t)num_selection * sizeof(*temp_selection_idx));
959  selection_pos = xmalloc((size_t)num_selection * sizeof(*selection_pos));
960 
961  xt_quicksort_index(temp_selection_idx, num_selection, selection_pos, 1);
962  sorted_selection_idx = temp_selection_idx;
963 
964 sorted: ;
965  const Xt_int *body_indices = idxsection_get_indices_const(body_idxlist);
966  size_t num_body_indices = (size_t)xt_idxlist_get_num_indices(body_idxlist);
967 
968  // Xt_int last_idx = sorted_selection_idx[0] - 1;
969  //
970  // for (i = 0, j = 0; i < num_selection && j < num_body_indices; ++i) {
971  //
972  // while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
973  //
974  // if (j >= num_body_indices) break;
975  //
976  // if (!single_match_only)
977  // positions[(selection_pos == NULL)?i:selection_pos[i]] =
978  // (body_indices[j] == sorted_selection_idx[i])?j:-1;
979  // else
980  // positions[selection_pos[i]] =
981  // ((last_idx == sorted_selection_idx[i]) ||
982  // (body_indices[j] != sorted_selection_idx[i]))?-1:j;
983  // }
984 
985  // the following loops are an unrolled version of the one above
986 
987  if (!single_match_only) {
988 
989  if (selection_pos == NULL) {
990  i = 0;
991  for (size_t j = 0; i < (size_t)num_selection && j < num_body_indices; ++i) {
992 
993  while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
994 
995  if (j >= num_body_indices) break;
996 
997  positions[i] = (body_indices[j] == sorted_selection_idx[i])?(int)j:-1;
998  }
999  } else {
1000  i = 0;
1001  for (size_t j = 0; i < (size_t)num_selection && j < num_body_indices; ++i) {
1002 
1003  while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1004 
1005  if (j >= num_body_indices) break;
1006 
1007  positions[selection_pos[i]] = (body_indices[j] == sorted_selection_idx[i])?(int)j:-1;
1008  }
1009  }
1010  } else {
1011 
1012  Xt_int last_idx = (Xt_int)(sorted_selection_idx[0] - 1);
1013 
1014  if (selection_pos == NULL) {
1015  i = 0;
1016  for (size_t j = 0; i < (size_t)num_selection && j < num_body_indices; ++i) {
1017 
1018  while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1019 
1020  if (j >= num_body_indices) break;
1021 
1022  positions[i] = ((last_idx == sorted_selection_idx[i]) ||
1023  (body_indices[j] != sorted_selection_idx[i]))?-1:(int)j;
1024 
1025  last_idx = sorted_selection_idx[i];
1026  }
1027  } else {
1028  i = 0;
1029  for (size_t j = 0; i < (size_t)num_selection && j < num_body_indices; ++i) {
1030 
1031  while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1032 
1033  if (j >= num_body_indices) break;
1034 
1035  positions[selection_pos[i]] = ((last_idx == sorted_selection_idx[i]) ||
1036  (body_indices[j] != sorted_selection_idx[i]))?-1:(int)j;
1037 
1038  last_idx = sorted_selection_idx[i];
1039  }
1040  }
1041  }
1042 
1043  // process indices that were not handled by the loop above
1044  if (selection_pos == NULL)
1045  for (; i < (size_t)num_selection; ++i)
1046  positions[i] = -1;
1047  else
1048  for (; i < (size_t)num_selection; ++i)
1049  positions[selection_pos[i]] = -1;
1050 
1051  free(temp_selection_idx);
1052  free(selection_pos);
1053 
1054  int num_unmatched = 0;
1055 
1056  // count the number of unmachted indices
1057  for (size_t j = 0; j < (size_t)num_selection; ++j)
1058  num_unmatched += positions[j] == -1;
1059 
1060  INSTR_STOP(instr);
1061  return num_unmatched;
1062 }
1063 
1064 static size_t
1066  int position_offset,
1067  const Xt_int indices[],
1068  size_t num_indices,
1069  int positions[],
1070  int ndim,
1071  struct dim_desc dims[ndim])
1072 {
1073  size_t num_processed = 0;
1074 
1075  Xt_int abs_global_size = XT_INT_ABS(dims[0].global_size);
1076  int abs_local_size = abs(dims[0].local_size);
1077  Xt_int abs_global_stride = XT_INT_ABS(dims[0].global_stride);
1078  Xt_int abs_local_stride = XT_INT_ABS(dims[0].local_stride);
1079 
1080  if (ndim == 1)
1081  {
1082 
1083  Xt_int curr_position;
1084 
1085  Xt_int tmp_local_start = dims[0].local_start;
1086 
1087  // we want to work on ascending indices in the lowest dimension -> have to
1088  // adjust in case of negative global size
1089  if (dims[0].global_size < 0)
1090  tmp_local_start = (Xt_int)(-dims[0].global_size - tmp_local_start -
1091  abs_local_size);
1092 
1093  Xt_int min_index = (Xt_int)(index_offset + tmp_local_start * abs_global_stride);
1094 
1095  // set all indices that are smaller than the minimum to "not found"
1096  while ((num_processed < num_indices)
1097  && (indices[num_processed] < min_index))
1098  positions[num_processed++] = -1;
1099 
1100  // if either the local or the global dimension is negative
1101  if ((dims[0].global_stride < 0) ^ (dims[0].local_stride < 0)) {
1102 
1103  // for as long as we are in the range local section of the current
1104  // global dimension
1105  while ((num_processed < num_indices) &&
1106  ((curr_position = (Xt_int)(indices[num_processed] - min_index)) <
1107  abs_local_size)) {
1108 
1109  positions[num_processed++] = position_offset
1110  + (int)(abs_local_size - curr_position - 1);
1111  }
1112  } else { // if the local and global dimension are both negative or positive
1113 
1114  // for as long as we are in the range local section of the current
1115  // global dimension
1116  while ((num_processed < num_indices) &&
1117  ((curr_position = (Xt_int)(indices[num_processed] - min_index)) <
1118  abs_local_size)) {
1119 
1120  positions[num_processed++] = position_offset + (int)curr_position;
1121  }
1122  }
1123 
1124  // for all remaining indices that are in the current global dimension but not
1125  // within the local section
1126  while ((num_processed < num_indices) &&
1127  (indices[num_processed] < index_offset + abs_global_size))
1128  positions[num_processed++] = -1;
1129 
1130  } else {
1131 
1132  assert(ndim > 1);
1133 
1134  Xt_int tmp_local_start = dims[0].local_start;
1135 
1136  // we want to work on ascending indices in the lowest dimension -> have to
1137  // adjust in case of negative global size
1138  if (dims[0].global_size < 0)
1139  tmp_local_start = (Xt_int)(-dims[0].global_size - tmp_local_start -
1140  abs_local_size);
1141 
1142  Xt_int min_index
1143  = (Xt_int)(index_offset + tmp_local_start * abs_global_stride);
1144 
1145  // set all indices that are smaller than the minimum to "not found"
1146  while ((num_processed < num_indices)
1147  && (indices[num_processed] < min_index))
1148  positions[num_processed++] = -1;
1149 
1150  // while there are indices that have not yet been processed
1151  while (num_processed < num_indices) {
1152 
1153  Xt_int curr_global_position, curr_local_position;
1154 
1155  // compute global position of the smallest index that has not yet been processed
1156  curr_global_position = (Xt_int)((indices[num_processed] - index_offset) /
1157  abs_global_stride);
1158 
1159  // if the position is outside of the range of the current dimension
1160  if (curr_global_position >= tmp_local_start + abs_local_size)
1161  break;
1162 
1163  // if either the local or the global dimension is negative
1164  if ((dims[0].global_size < 0) ^ (dims[0].local_size < 0))
1165 
1166  curr_local_position = (Xt_int)(abs_local_size - curr_global_position
1167  + tmp_local_start - 1);
1168  else // if the local and global dimension are both negative or positive
1169  curr_local_position = (Xt_int)(curr_global_position - tmp_local_start);
1170 
1171  Xt_int curr_index_offset
1172  = (Xt_int)(index_offset + curr_global_position * abs_global_stride);
1173  /* FIXME: no guarantee curr_local_position * abs_local_stride
1174  * <= INT_MAX */
1175  int position_offset_ = (int)(curr_local_position * abs_local_stride);
1176 
1178  curr_index_offset, position_offset_, indices + num_processed,
1179  num_indices - num_processed, positions + num_processed, ndim-1,
1180  dims + 1);
1181  }
1182  }
1183 
1184  return num_processed;
1185 }
1186 
1187 static int
1189  const Xt_int *restrict selection_idx,
1190  int num_selection,
1191  int *restrict positions,
1192  int single_match_only) {
1193 
1194  INSTR_DEF(instr,"idxsection_get_positions_of_indices_v3.part")
1195  INSTR_DEF(instr2,"idxsection_get_positions_of_indices_recursive")
1196 
1197  Xt_idxsection section = (Xt_idxsection)body_idxlist;
1198 
1199  if (num_selection < 1) return 0; //nothing to do
1200 
1201  if (num_selection == 1)
1202  return idxsection_get_position_of_index(body_idxlist, *selection_idx, positions);
1203 
1204  INSTR_START(instr);
1205 
1206  const Xt_int * restrict sorted_selection_idx;
1207  Xt_int *temp_selection_idx = NULL;
1208  int *sorted_positions;
1209  int *selection_pos = NULL;
1210 
1211  for (size_t i = 1; i < (size_t)num_selection; ++i)
1212  if (selection_idx[i] < selection_idx[i-1])
1213  goto unsorted_selection;
1214 
1215  sorted_selection_idx = selection_idx;
1216  sorted_positions = positions;
1217  goto sorted_selection;
1218  // if the selection is not sorted
1219 unsorted_selection:
1220  temp_selection_idx
1221  = xmalloc((size_t)num_selection * sizeof(*temp_selection_idx));
1222  {
1223  size_t num_sp_alloc = (size_t)num_selection;
1224 #if defined _CRAYC && _RELEASE_MAJOR < 9
1225  num_sp_alloc = (num_sp_alloc + _MAXVL_32 - 1) & ~(_MAXVL_32 - 1);
1226 #endif
1227  size_t total_alloc = num_sp_alloc + (size_t)num_selection;
1228  sorted_positions
1229  = xmalloc(total_alloc * sizeof(*sorted_positions));
1230  selection_pos = sorted_positions + num_sp_alloc;
1231  }
1232  memcpy(temp_selection_idx, selection_idx,
1233  (size_t)num_selection * sizeof(*temp_selection_idx));
1234 
1235  xt_quicksort_index(temp_selection_idx, num_selection, selection_pos, 1);
1236  sorted_selection_idx = temp_selection_idx;
1237 sorted_selection:
1238 
1239  INSTR_START(instr2);
1240 
1241  size_t num_processed
1243  section->global_start_index,
1244  0, sorted_selection_idx, (size_t)num_selection,
1245  sorted_positions, section->ndim,
1246  section->dims);
1247 
1248  INSTR_STOP(instr2);
1249 
1250  // set remaining index positions to -1
1251  for (size_t i = num_processed; i < (size_t)num_selection; ++i)
1252  sorted_positions[i] = -1;
1253 
1254  // apply single match only rule
1255  if (single_match_only)
1256  for (size_t i = 1; i < num_processed; ++i)
1257  if (sorted_selection_idx[i] == sorted_selection_idx[i-1])
1258  sorted_positions[i] = -1;
1259 
1260  // convert positions if unsorted
1261  if (sorted_selection_idx != selection_idx) {
1262 
1263  for (size_t i = 0; i < (size_t)num_selection; ++i)
1264  positions[i] = sorted_positions[selection_pos[i]];
1265 
1266  free(sorted_positions);
1267  free(temp_selection_idx);
1268  }
1269 
1270  // count the number of unmached indices
1271  size_t num_unmatched = (size_t)num_selection - num_processed;
1272 
1273  for (size_t i = 0; i < num_processed; ++i)
1274  num_unmatched += positions[i] == -1;
1275 
1276  INSTR_STOP(instr);
1277 
1278  return (int)num_unmatched;
1279 }
1280 
1281 static int
1283  const Xt_int *selection_idx,
1284  int num_selection, int * positions,
1285  int single_match_only) {
1286 
1287  INSTR_DEF(instr,"idxsection_get_positions_of_indices")
1288  Xt_idxsection section = (Xt_idxsection)body_idxlist;
1289  int retval = 0;
1290 
1291 
1292  INSTR_START(instr);
1293 
1294  // if any dimension of the body index list is negative we have to use the
1295  // v3 version, because the other version cannot handle negative sizes
1296  for (int i = 0; i < section->ndim; ++i)
1297  if ((section->dims[i].local_size < 0) || (section->dims[i].global_size < 0)) {
1298  retval = idxsection_get_positions_of_indices_v3(body_idxlist, selection_idx,
1299  num_selection, positions,
1300  single_match_only);
1301  goto fun_exit;
1302  }
1303 
1304  {
1305  int num_section_indices = xt_idxlist_get_num_indices(body_idxlist);
1306 
1307  /*
1308  * if the indices are already cached or (if the caching would not
1309  * consume too much memory and the number of selection indices are
1310  * sufficient to justify the use of cached indices)
1311  */
1312  if ((section->index_array_cache != NULL) ||
1313  (((size_t)num_section_indices * sizeof(Xt_int)
1314  <= (size_t)128 * 1024U * 1024U)
1315  && ((Xt_int)num_section_indices <= 1000 * num_selection))) {
1316  retval = idxsection_get_positions_of_indices_v2(body_idxlist, selection_idx,
1317  num_selection, positions,
1318  single_match_only);
1319  goto fun_exit;
1320  }
1321  else {
1322  retval = idxsection_get_positions_of_indices_v1(body_idxlist, selection_idx,
1323  num_selection, positions,
1324  single_match_only);
1325  goto fun_exit;
1326  }
1327  }
1328 
1329  fun_exit: ;
1330  INSTR_STOP(instr);
1331  return retval;
1332 }
1333 
1334 static int
1336  int * position, int offset) {
1337 
1338  int temp_position;
1339  // we make use of the uniqueness of the index-to-position relation:
1340  if (idxsection_get_position_of_index(idxlist, index, &temp_position))
1341  return 1;
1342 
1343  if (temp_position < offset)
1344  return 1;
1345 
1346  *position = temp_position;
1347 
1348  return 0;
1349 }
1350 
1351 static Xt_int
1353 
1354  Xt_idxsection section = (Xt_idxsection)idxlist;
1355  return section->min_index_cache;
1356 }
1357 
1358 static Xt_int
1360 
1361  Xt_idxsection section = (Xt_idxsection)idxlist;
1362  return section->max_index_cache;
1363 }
1364 
1365 /*
1366  * Local Variables:
1367  * c-basic-offset: 2
1368  * coding: utf-8
1369  * indent-tabs-mode: nil
1370  * show-trailing-whitespace: t
1371  * require-trailing-newline: t
1372  * End:
1373  */
int MPI_Comm
Definition: core.h:64
#define INSTR_STOP(T)
Definition: instr.h:69
#define INSTR_DEF(T, S)
Definition: instr.h:66
#define INSTR_START(T)
Definition: instr.h:68
void xt_mergesort_index(Xt_int *val, int n, int *pos, int reset_pos)
Definition: mergesort.c:195
void xt_mergesort_idxpos(idxpos_type *restrict v, size_t n)
Definition: mergesort.c:186
merge sort declaration
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
Definition: ppm_xfuncs.h:71
#define xcalloc(nmemb, size)
Definition: ppm_xfuncs.h:68
#define xmalloc(size)
Definition: ppm_xfuncs.h:70
void xt_quicksort_index(Xt_int *v_idx, int n, int *v_pos, int reset_pos)
Definition: quicksort.c:80
quicksort declaration
Xt_int * index_array_cache
Xt_int global_start_index
struct Xt_idxlist_ parent
Xt_int local_start_index
Xt_int min_index_cache
Xt_int max_index_cache
struct dim_desc dims[]
Xt_int start
Definition: xt_stripe.h:55
Xt_int global_size
Xt_int local_start
Xt_int global_stride
Xt_int local_stride
int local_size
void(* delete)(Xt_idxlist)
static int isign(int x)
static Xt_int Xt_isign(Xt_int x)
int xt_initialized(void)
Definition: xt_init.c:107
#define Xt_int_dt
Definition: xt_core.h:69
XT_INT Xt_int
Definition: xt_core.h:68
Xt_idxlist xt_idxempty_new(void)
Definition: xt_idxempty.c:165
index list declaration
int xt_idxlist_get_num_indices(Xt_idxlist idxlist)
Definition: xt_idxlist.c:98
const Xt_int * xt_idxlist_get_indices_const(Xt_idxlist idxlist)
Definition: xt_idxlist.c:108
Provide non-public declarations common to all index lists.
Xt_idxlist xt_default_isect(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
@ SECTION
static int idxsection_get_positions_of_indices(Xt_idxlist body_idxlist, Xt_int const *selection_idx, int num_selection, int *positions, int single_match_only)
Xt_idxlist xt_idxsection_new(Xt_int start, int num_dimensions, const Xt_int global_size[num_dimensions], const int local_size[num_dimensions], const Xt_int local_start[num_dimensions])
Xt_idxlist xt_idxsection_get_intersection_with_other_idxlist(Xt_idxlist src_idxsection, Xt_idxlist dst_idxlist)
static Xt_idxlist idxsection_copy(Xt_idxlist idxlist)
static int idxsection_get_positions_of_indices_v3(Xt_idxlist body_idxlist, const Xt_int *restrict selection_idx, int num_selection, int *restrict positions, int single_match_only)
Xt_idxlist xt_idxsection_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst)
static const struct xt_idxlist_vtable idxsection_vtable
void xt_idxsection_initialize(void)
static MPI_Datatype dim_desc_dt
static void idxsection_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
void xt_idxsection_finalize(void)
static int idxsection_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static int idxsection_get_positions_of_indices_v2(Xt_idxlist body_idxlist, const Xt_int selection_idx[], int num_selection, int positions[], int single_match_only)
static Xt_int idxsection_get_max_index(Xt_idxlist idxlist)
static void idxsection_delete(Xt_idxlist data)
Xt_idxlist xt_idxsection_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
static int idxsection_get_indices_any(Xt_int start_index, Xt_int *indices, int ndim, struct dim_desc dims[ndim])
static int idxsection_get_num_indices(Xt_idxsection section)
static int idxsection_get_positions_of_indices_v1(Xt_idxlist body_idxlist, const Xt_int selection_idx[], int num_selection, int positions[], int single_match_only)
static void idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static size_t idxsection_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static size_t idxsection_get_positions_of_indices_recursive(Xt_int index_offset, int position_offset, const Xt_int indices[], size_t num_indices, int positions[], int ndim, struct dim_desc dims[ndim])
static int idxsection_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static int idxsection_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
struct Xt_idxsection_ * Xt_idxsection
static Xt_int idxsection_get_min_index(Xt_idxlist idxlist)
static void idxsection_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static const Xt_int * idxsection_get_indices_const(Xt_idxlist idxlist)
Xt_idxlist xt_idxvec_new(const Xt_int *idxlist, int num_indices)
Definition: xt_idxvec.c:163
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition: xt_mpi.h:68