Yet Another eXchange Tool 0.11.3
Loading...
Searching...
No Matches
xt_idxsection.c
Go to the documentation of this file.
1
12/*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46#ifdef HAVE_CONFIG_H
47#include <config.h>
48#endif
49
50#include <assert.h>
51#include <limits.h>
52#include <stdlib.h>
53#include <stdio.h>
54#include <string.h>
55
56#include "xt_arithmetic_util.h"
57#include "xt_arithmetic_long.h"
58#include "xt/xt_idxlist.h"
59#include "xt_idxlist_internal.h"
60#include "xt/xt_idxempty.h"
61#include "xt/xt_idxvec.h"
62#include "xt/xt_idxsection.h"
64#include "xt/xt_mpi.h"
65#include "xt/xt_sort.h"
66#include "xt_idxlist_unpack.h"
67#include "core/ppm_xfuncs.h"
68#include "core/core.h"
69#include "instr.h"
70#include "xt_config_internal.h"
71#include "xt_stripe_util.h"
72#include "xt_idxvec_internal.h"
74
75static void
77
78static size_t
80
81static void
82idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size,
83 int *position, MPI_Comm comm);
84
85static Xt_idxlist
87
88static Xt_idxlist
90
91static void
93
94static const Xt_int *
96
97static int
99
100static void
102 struct Xt_stripe *restrict stripes,
103 size_t num_stripes_alloc);
104
105static int
106idxsection_get_index_at_position(Xt_idxlist idxlist, int position,
107 Xt_int * index);
108
109static int
111 int * position);
112
113static int
115 int * position, int offset);
116static size_t
118 Xt_int const *selection_idx,
119 size_t num_selection, int *positions,
120 int single_match_only);
121
122static Xt_int
124
125static Xt_int
127
128static int
130
132 .delete = idxsection_delete,
133 .get_pack_size = idxsection_get_pack_size,
134 .pack = idxsection_pack,
135 .copy = idxsection_copy,
136 .sorted_copy = idxsection_sorted_copy,
137 .get_indices = idxsection_get_indices,
138 .get_indices_const = idxsection_get_indices_const,
139 .get_num_index_stripes = idxsection_get_num_index_stripes,
140 .get_index_stripes = idxsection_get_index_stripes,
141 .get_index_at_position = idxsection_get_index_at_position,
142 .get_indices_at_positions = NULL,
143 .get_position_of_index = idxsection_get_position_of_index,
144 .get_positions_of_indices = idxsection_get_positions_of_indices,
145 .get_position_of_index_off = idxsection_get_position_of_index_off,
146 .get_positions_of_indices_off = NULL,
147 .get_min_index = idxsection_get_min_index,
148 .get_max_index = idxsection_get_max_index,
149 .get_sorting = idxsection_get_sorting,
150 .get_bounding_box = NULL,
151 .idxlist_pack_code = SECTION,
152};
153
154/* descriptor for per-dimension extent and stride */
156{
158#ifdef XT_LONG
159 struct Xt_muldiv agsmd; /* multiplicative inverse stored for
160 * fast division by absolute global stride */
161#endif
163};
164
165static MPI_Datatype dim_desc_dt;
166
167
169
185
186static int
188
189#if __GNUC__ >= 11
190__attribute__ ((access (none, 1)))
191int MPI_Get_address(XT_MPI_GET_ADDRESS_LOCATION_CONST void *location, MPI_Aint *address);
192#endif
193
194void
196{
197 struct dim_desc dim_desc;
198
199 MPI_Aint base_address, local_size_address;
200
201 MPI_Get_address(&dim_desc, &base_address);
202 MPI_Get_address(&dim_desc.local_size, &local_size_address);
203
204 enum { num_dt_components = 2 };
205 static const int block_lengths[num_dt_components] = { 2, 1 };
206 MPI_Aint displacements[num_dt_components]
207 = {0, local_size_address - base_address };
208 static const MPI_Datatype types[num_dt_components]
209 = { Xt_int_dt, MPI_INT };
210 MPI_Datatype dim_desc_dt_unaligned;
211#ifdef XT_MPI_TYPE_CREATE_STRUCT_CONST_ARRAY_ARGS
212 xt_mpi_call(MPI_Type_create_struct(num_dt_components,
213 block_lengths, displacements, types,
214 &dim_desc_dt_unaligned), Xt_default_comm);
215#else
216 xt_mpi_call(MPI_Type_create_struct(num_dt_components,
217 (int *)block_lengths, displacements,
218 (MPI_Datatype *)types,
219 &dim_desc_dt_unaligned), Xt_default_comm);
220#endif
221 xt_mpi_call(MPI_Type_create_resized(dim_desc_dt_unaligned, 0,
222 (MPI_Aint)sizeof(dim_desc),
223 &dim_desc_dt), Xt_default_comm);
224 xt_mpi_call(MPI_Type_free(&dim_desc_dt_unaligned), Xt_default_comm);
225 xt_mpi_call(MPI_Type_commit(&dim_desc_dt), Xt_default_comm);
226}
227
228void
230{
231 xt_mpi_call(MPI_Type_free(&dim_desc_dt), Xt_default_comm);
232}
233
234static struct Xt_minmax
235get_section_minmax(size_t num_dimensions,
236 Xt_int local_start_index,
237 const struct dim_desc dims[num_dimensions])
238{
239 // due the possibility of negative local and global sizes, the minimum and
240 // maximum can be in any corner of the n-dimensional section
241 Xt_int max = local_start_index, min = local_start_index;
242 for (size_t i = 0; i < num_dimensions; ++i) {
243 // if either local and global size are negative
244 if (dims[i].global_stride < 0)
245 min = (Xt_int)(min + (Xt_int)(dims[i].global_stride *
246 (Xt_int)(abs(dims[i].local_size) - 1)));
247 else // if local and global size are both positive or negative
248 max = (Xt_int)(max + (Xt_int)(dims[i].global_stride *
249 (Xt_int)(abs(dims[i].local_size) - 1)));
250 }
251 return (struct Xt_minmax){ .min = min, .max = max };
252}
253
254
257 unsigned flags;
258};
259
263static struct section_aggregate
264setup_dims(Xt_int start, size_t num_dimensions,
265 struct dim_desc dims[num_dimensions])
266{
267 int asc_dim = 0, dsc_dim = 0;
268 for (size_t i = 0; i < num_dimensions; ++i) {
269 /* are indices enumerated in reverse order along this dimension? */
270 int goes_rev_order = (dims[i].global_size < 0) ^ (dims[i].local_size < 0);
271 asc_dim += !goes_rev_order;
272 dsc_dim += goes_rev_order;
273 }
274 unsigned flags = XT_SORT_FLAGS(asc_dim, dsc_dim);
275 dims[num_dimensions - 1].global_stride =
276 (Xt_int)(Xt_isign(dims[num_dimensions - 1].global_size) *
277 isign(dims[num_dimensions - 1].local_size));
278#ifdef XT_LONG
279 dims[num_dimensions - 1].agsmd
280 = xt_get_mulinv(XT_INT_ABS(dims[num_dimensions - 1].global_stride));
281#endif
282 dims[num_dimensions - 1].local_stride = 1;
283
284 // compute local and global stride
285 // (local stride is always positive, global stride can be negative)
286 for (size_t i = num_dimensions - 2; i != (size_t)-1; --i) {
287 dims[i].global_stride
288 = (Xt_int)(XT_INT_ABS(dims[i+1].global_stride * dims[i+1].global_size)
289 * Xt_isign(dims[i].global_size)
290 * isign(dims[i].local_size));
291 dims[i].local_stride
292 = abs(dims[i+1].local_stride * dims[i+1].local_size);
293#ifdef XT_LONG
294 dims[i].agsmd
295 = xt_get_mulinv(XT_INT_ABS(dims[i].global_stride));
296#endif
297 }
298
299 // compute the local start index
300 // depends on global size and sign of local and global size
301 Xt_int local_start_index = start;
302 for (size_t i = num_dimensions - 1; i != (size_t)-1; --i) {
303 Xt_int adj_gstride
304 = (Xt_int)(dims[i].global_stride * isign(dims[i].local_size)),
305 start_ofs
306 = (Xt_int)(dims[i].global_size > 0 ? 0 : dims[i].global_size + 1);
307 local_start_index
308 = (Xt_int)(local_start_index
309 + (adj_gstride * (start_ofs + dims[i].local_start)));
310 if (dims[i].local_size < 0)
311 local_start_index
312 = (Xt_int)(local_start_index
313 + (dims[i].global_stride * (dims[i].local_size + 1)));
314 }
315 struct Xt_minmax mm = get_section_minmax(num_dimensions, local_start_index, dims);
316 return (struct section_aggregate){ .start = local_start_index,
317 .max = mm.max, .min = mm.min,
318 .flags = flags };
319}
320
321static int
322get_num_indices_from_local_sizes(size_t num_dimensions, const int local_size[num_dimensions])
323{
324 long long size = 1;
325
326 for (size_t i = 0; i < num_dimensions; ++i)
327 size *= abs(local_size[i]);
328 assert(size <= INT_MAX);
329
330 return (int)size;
331}
332
333Xt_idxlist xt_idxsection_new(Xt_int start, int num_dimensions,
334 const Xt_int global_size[num_dimensions],
335 const int local_size[num_dimensions],
336 const Xt_int local_start[num_dimensions]) {
337
338 INSTR_DEF(instr,"xt_idxsection_new")
339 INSTR_START(instr);
340 // ensure that yaxt is initialized
341 assert(xt_initialized());
342
343 size_t num_dimensions_ = (size_t)num_dimensions;
344 int num_indices
345 = get_num_indices_from_local_sizes(num_dimensions_, local_size);
346 Xt_idxlist result;
347 if (num_indices > 0) {
348 Xt_idxsection idxsection
349 = xmalloc(sizeof (*idxsection)
350 + num_dimensions_ * sizeof (idxsection->dims[0]));
351
352 idxsection->global_start_index = start;
353 idxsection->ndim = num_dimensions;
354
355 idxsection->index_array_cache = NULL;
356
357 for (size_t i = 0; i < num_dimensions_; ++i) {
358 idxsection->dims[i].global_size = global_size[i];
359 idxsection->dims[i].local_size = local_size[i];
360 idxsection->dims[i].local_start = local_start[i];
361 }
362 Xt_idxlist_init(&idxsection->parent, &idxsection_vtable, num_indices);
363 struct section_aggregate sa =
364 setup_dims(start, num_dimensions_, idxsection->dims);
365 idxsection->local_start_index = sa.start;
366 idxsection->min_index_cache = sa.min;
367 idxsection->max_index_cache = sa.max;
368 idxsection->flags = sa.flags;
369 result = &idxsection->parent;
370 } else {
371 result = xt_idxempty_new();
372 }
373 INSTR_STOP(instr);
374
375 return result;
376}
377
378static void
380
381 if (data == NULL) return;
382
383 Xt_idxsection section = (Xt_idxsection)data;
384
385 free(section->index_array_cache);
386 free(section);
387}
388
389static size_t
391
392 Xt_idxsection section = (Xt_idxsection)data;
393
394 int size_header, size_dim_descs, size_xt_int;
395
396 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
397 xt_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &size_xt_int), comm);
398 xt_mpi_call(MPI_Pack_size(section->ndim, dim_desc_dt,
399 comm, &size_dim_descs), comm);
400
401 return (size_t)size_header + (size_t)size_dim_descs
402 + (size_t)size_xt_int;
403}
404
405static void
406idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size,
407 int *position, MPI_Comm comm) {
408
409 INSTR_DEF(instr,"idxsection_pack")
410 INSTR_START(instr);
411
412 assert(data);
413 Xt_idxsection section = (Xt_idxsection)data;
414 int header[2] = { SECTION, section->ndim };
415 xt_mpi_call(MPI_Pack(header, 2, MPI_INT, buffer,
416 buffer_size, position, comm), comm);
417 xt_mpi_call(MPI_Pack(&section->global_start_index, 1, Xt_int_dt, buffer,
418 buffer_size, position, comm), comm);
419 xt_mpi_call(MPI_Pack(section->dims, section->ndim, dim_desc_dt,
420 buffer, buffer_size, position, comm), comm);
421 INSTR_STOP(instr);
422}
423
424Xt_idxlist xt_idxsection_unpack(void *buffer, int buffer_size, int *position,
425 MPI_Comm comm) {
426
427 INSTR_DEF(instr,"xt_idxsection_unpack")
428 INSTR_START(instr);
429
430 int ndim;
431 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position, &ndim, 1, MPI_INT,
432 comm), comm);
433 Xt_idxsection section
434 = xmalloc(sizeof (*section) + (size_t)ndim * sizeof(section->dims[0]));
435 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
436 &section->global_start_index, 1, Xt_int_dt, comm),
437 comm);
438 assert(ndim > 0);
439 section->index_array_cache = NULL;
440 section->ndim = ndim;
441
442 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
443 section->dims, ndim, dim_desc_dt, comm),comm);
444
445 struct section_aggregate sa = setup_dims(
446 section->global_start_index, (size_t)ndim, section->dims);
447 section->local_start_index = sa.start;
448 section->min_index_cache = sa.min;
449 section->max_index_cache = sa.max;
450 section->flags = sa.flags;
451 int num_indices = idxsection_get_num_indices(section);
452 Xt_idxlist_init(&section->parent, &idxsection_vtable, num_indices);
453 INSTR_STOP(instr);
454 return (Xt_idxlist)section;
455}
456
459 Xt_idxlist dst_idxlist,
460 Xt_config config)
461{
462 // intersection between an idxsection and a general idxlist:
463 //
464 // performance picture:
465 // - src_idxsection is treated as too big for elemental transforms/access
466 // - dst_idxlist is considered to be small enough (subdomain like) for elemental usage
467
468 INSTR_DEF(instr,"idxsection_get_intersection_with_other_idxlist")
469 INSTR_START(instr);
470
471 size_t num_dst_idx = (size_t)(xt_idxlist_get_num_indices(dst_idxlist));
472
473 Xt_int const* dst_idx = xt_idxlist_get_indices_const(dst_idxlist);
474
475 Xt_int const * sorted_dst_idx;
476 Xt_int * temp_dst_idx = NULL;
477 /* todo: use xt_idxlist_get_sorting instead */
478 for (size_t i = 1; i < num_dst_idx; ++i)
479 if (dst_idx[i] < dst_idx[i-1])
480 goto unsorted;
481 sorted_dst_idx = dst_idx;
482 goto get_pos;
483unsorted:
484 temp_dst_idx = xmalloc(num_dst_idx * sizeof(*temp_dst_idx));
485 memcpy(temp_dst_idx, dst_idx, num_dst_idx * sizeof(*temp_dst_idx));
486
487 config->sort_funcs->sort_xt_int(temp_dst_idx, num_dst_idx);
488 sorted_dst_idx = temp_dst_idx;
489get_pos:;
490 int *pos = xmalloc(num_dst_idx * sizeof(*pos));
491 size_t num_unmatched = idxsection_get_positions_of_indices(
492 src_idxsection, sorted_dst_idx, num_dst_idx, pos, false);
493 size_t num_inter_idx = num_dst_idx - num_unmatched;
494 Xt_idxlist result;
495 if (num_inter_idx) {
496 struct Xt_vec_alloc vec_alloc = xt_idxvec_alloc((int)num_inter_idx);
497 Xt_int *restrict inter_vec = vec_alloc.vector;
498
499 for(size_t i = 0, j = 0; i < num_dst_idx && j < num_inter_idx; i++)
500 if (pos[i] >= 0)
501 inter_vec[j++] = sorted_dst_idx[i];
502 result = xt_idxvec_congeal(vec_alloc);
503 } else {
504 result = xt_idxempty_new();
505 }
506
507 free(temp_dst_idx);
508 free(pos);
509
510 INSTR_STOP(instr);
511 return result;
512}
513
514static void
516
517#undef XT_IDXSECTION_STRIPES_ISECT_SINGLE_MATCH_ONLY
518#define NUM_DIMENSIONS 1
520#undef NUM_DIMENSIONS
521#define NUM_DIMENSIONS 2
523#undef NUM_DIMENSIONS
524#define NUM_DIMENSIONS 3
526#undef NUM_DIMENSIONS
527#define NUM_DIMENSIONS 4
529#undef NUM_DIMENSIONS
531#undef NUM_DIMENSIONS
532
533
536 Xt_idxlist dst_idxlist,
537 Xt_config config)
538{
539 INSTR_DEF(instr,"idxsection_get_idxstripes_intersection")
540 INSTR_START(instr);
541 (void)config;
542 assert(dst_idxlist->vtable->idxlist_pack_code == STRIPES);
543 Xt_idxsection idxsection = (Xt_idxsection)src_idxlist;
544 Xt_idxlist result;
545 switch (idxsection->ndim) {
546 case 1:
547 result
548 = xt_idxsection_get_idxstripes_intersection_1(
549 (Xt_idxsection)src_idxlist, dst_idxlist, config);
550 break;
551 case 2:
552 result
553 = xt_idxsection_get_idxstripes_intersection_2(
554 (Xt_idxsection)src_idxlist, dst_idxlist, config);
555 break;
556 case 3:
557 result
558 = xt_idxsection_get_idxstripes_intersection_3(
559 (Xt_idxsection)src_idxlist, dst_idxlist, config);
560 break;
561 case 4:
562 result
563 = xt_idxsection_get_idxstripes_intersection_4(
564 (Xt_idxsection)src_idxlist, dst_idxlist, config);
565 break;
566 default:
567 result
568 = xt_idxsection_get_idxstripes_intersection_(
569 (Xt_idxsection)src_idxlist, dst_idxlist, config);
570 }
571 INSTR_STOP(instr);
572 return result;
573}
574
575#define XT_IDXSECTION_STRIPES_ISECT_SINGLE_MATCH_ONLY
576#define NUM_DIMENSIONS 1
578#undef NUM_DIMENSIONS
579#define NUM_DIMENSIONS 2
581#undef NUM_DIMENSIONS
582#define NUM_DIMENSIONS 3
584#undef NUM_DIMENSIONS
585#define NUM_DIMENSIONS 4
587#undef NUM_DIMENSIONS
589#undef XT_IDXSECTION_STRIPES_ISECT_SINGLE_MATCH_ONLY
590
593 Xt_idxlist dst_idxlist,
594 Xt_config config)
595{
596 INSTR_DEF(instr,"idxsection_get_idxstripes_r_intersection")
597 INSTR_START(instr);
598 (void)config;
599 assert(src_idxlist->vtable->idxlist_pack_code == STRIPES);
600 Xt_idxsection idxsection = (Xt_idxsection)dst_idxlist;
601 Xt_idxlist result;
602 switch (idxsection->ndim) {
603 case 1:
604 result
605 = xt_idxsection_get_idxstripes_intersection_sm_1(
606 (Xt_idxsection)dst_idxlist, src_idxlist, config);
607 break;
608 case 2:
609 result
610 = xt_idxsection_get_idxstripes_intersection_sm_2(
611 (Xt_idxsection)dst_idxlist, src_idxlist, config);
612 break;
613 case 3:
614 result
615 = xt_idxsection_get_idxstripes_intersection_sm_3(
616 (Xt_idxsection)dst_idxlist, src_idxlist, config);
617 break;
618 case 4:
619 result
620 = xt_idxsection_get_idxstripes_intersection_sm_4(
621 (Xt_idxsection)dst_idxlist, src_idxlist, config);
622 break;
623 default:
624 result
625 = xt_idxsection_get_idxstripes_intersection_sm_(
626 (Xt_idxsection)dst_idxlist, src_idxlist, config);
627 }
628 INSTR_STOP(instr);
629 return result;
630}
631
634 Xt_idxlist idxlist_dst,
635 Xt_config config) {
636 INSTR_DEF(instr,"idxsection_get_intersection.part")
637
638 // both lists are index section:
639
640 const struct Xt_idxsection_ *idxsection_src = (Xt_idxsection)idxlist_src,
641 *idxsection_dst = (Xt_idxsection)idxlist_dst;
642
643 if (idxsection_src->ndim != idxsection_dst->ndim ||
644 idxsection_src->global_start_index != idxsection_dst->global_start_index)
645 return xt_default_isect(idxlist_src, idxlist_dst, config);
646
647 size_t num_dimensions = (size_t)idxsection_src->ndim;
648 // the size of first global dimension is irrelevant,
649 // the others have to be identically
650 for (size_t i = 1; i < num_dimensions; ++i)
651 if (XT_INT_ABS(idxsection_src->dims[i].global_size)
652 != XT_INT_ABS(idxsection_dst->dims[i].global_size))
654 idxlist_src, idxlist_dst, config);
655
656 INSTR_START(instr);
657
658 Xt_idxsection idxsection_intersection
659 = xmalloc(sizeof (*idxsection_intersection)
660 + num_dimensions * sizeof (idxsection_intersection->dims[0]));
661 idxsection_intersection->global_start_index
662 = idxsection_src->global_start_index;
663 idxsection_intersection->ndim = (int)num_dimensions;
664
665 // dimension information for the intersection
666 struct dim_desc *restrict intersection_dims = idxsection_intersection->dims;
667 const struct dim_desc *src_dims = idxsection_src->dims,
668 *dst_dims = idxsection_dst->dims;
669
670 // indices in an intersection have to be sorted in ascending order. therefore,
671 // local and global sizes of the intersection have to be positive
672
673 for (size_t i = 0; i < num_dimensions; ++i) {
674
675 Xt_int src_start, src_end, dst_start, dst_end, local_end;
676
677 // the start value is the minmum position in the current dimension (with positive
678 // size)
679 // in case the global size of src or dst is negative the start value has to be
680 // adjusted accordingly
681
682 if (src_dims[i].global_size >= 0)
683 src_start = src_dims[i].local_start;
684 else
685 src_start = (Xt_int)(-src_dims[i].global_size
686 - abs(src_dims[i].local_size)
687 - src_dims[i].local_start);
688
689 if (dst_dims[i].global_size >= 0)
690 dst_start = dst_dims[i].local_start;
691 else
692 dst_start = (Xt_int)(-dst_dims[i].global_size
693 - abs(dst_dims[i].local_size)
694 - dst_dims[i].local_start);
695
696 src_end = (Xt_int)(src_start
697 + (Xt_int)abs(src_dims[i].local_size));
698 dst_end = (Xt_int)(dst_start
699 + (Xt_int)abs(dst_dims[i].local_size));
700
701 intersection_dims[i].local_start = (src_start > dst_start)?src_start:dst_start;
702 local_end = (src_end > dst_end)?dst_end:src_end;
703
704 if (local_end <= intersection_dims[i].local_start)
705 goto isect_empty;
706
707 intersection_dims[i].local_size
708 = (int)(local_end - intersection_dims[i].local_start);
709 intersection_dims[i].global_size
710 = XT_INT_ABS(src_dims[i].global_size);
711 }
712
713 {
714 idxsection_intersection->index_array_cache = NULL;
715 int num_indices = idxsection_get_num_indices(idxsection_intersection);
716 Xt_idxlist_init(&idxsection_intersection->parent,
717 &idxsection_vtable, num_indices);
718 struct section_aggregate sa =
719 setup_dims(idxsection_intersection->global_start_index,
720 num_dimensions, idxsection_intersection->dims);
721 idxsection_intersection->local_start_index = sa.start;
722 idxsection_intersection->min_index_cache = sa.min;
723 idxsection_intersection->max_index_cache = sa.max;
724 idxsection_intersection->flags = sa.flags;
725 goto ret_idxlist;
726 }
727isect_empty:;
728 free(idxsection_intersection);
729 idxsection_intersection = (Xt_idxsection)xt_idxempty_new();
730ret_idxlist:;
731 INSTR_STOP(instr);
732 return (Xt_idxlist)idxsection_intersection;
733}
734
735static Xt_idxlist
737
738 Xt_idxsection src = (Xt_idxsection)idxlist;
739
740 size_t num_dimensions = (size_t)src->ndim;
741
742 Xt_idxsection idxsection = xmalloc(
743 sizeof (*idxsection) + num_dimensions * sizeof (idxsection->dims[0]));
744 *idxsection = *src;
745 idxsection->index_array_cache = NULL;
746
747 memcpy(idxsection->dims, src->dims, num_dimensions * sizeof (src->dims[0]));
748
749 return (Xt_idxlist)idxsection;
750}
751
752static void
754{
755 size_t num_dimensions = (size_t)orig->ndim;
756
757 *copy = *orig;
758 copy->index_array_cache = NULL;
759
760 const struct dim_desc *restrict dd_orig = orig->dims;
761 struct dim_desc *restrict dd_dst = copy->dims;
762 for (size_t i = 0; i < num_dimensions; ++i) {
763 Xt_int gsize = dd_orig[i].global_size,
764 abs_gsize = XT_INT_ABS(gsize);
765 int lsize = dd_orig[i].local_size,
766 abs_lsize = abs(lsize);
767 dd_dst[i].global_size = abs_gsize;
768 dd_dst[i].local_size = abs_lsize;
769 dd_dst[i].local_start = gsize >= 0
770 ? dd_orig[i].local_start
771 : (Xt_int)(abs_gsize - abs_lsize - dd_orig[i].local_start);
772 }
773
774 struct section_aggregate sa = setup_dims(
775 orig->global_start_index, num_dimensions, copy->dims);
776 copy->local_start_index = sa.start;
777 copy->flags = sa.flags;
778 assert(copy->min_index_cache == sa.min
779 && copy->max_index_cache == sa.max);
780}
781
782
783static Xt_idxlist
785
786 (void)config;
787 size_t num_dimensions = (size_t)((Xt_idxsection)idxlist)->ndim;
788 Xt_idxsection sorted_idxsection = xmalloc(
789 sizeof (*sorted_idxsection)
790 + num_dimensions * sizeof (sorted_idxsection->dims[0]));
791 idxsection_init_sorted_copy((Xt_idxsection)idxlist, sorted_idxsection);
792 return (Xt_idxlist)sorted_idxsection;
793}
794
795static int
797
798 long long size = 1;
799 size_t num_dimensions = (size_t)section->ndim;
800 for (size_t i = 0; i < num_dimensions; ++i)
801 size *= abs(section->dims[i].local_size);
802 assert(size <= INT_MAX);
803
804 return (int)size;
805}
806
807
808static int
810 Xt_int start_index, Xt_int *indices,
811 size_t num_dimensions, struct dim_desc dims[num_dimensions])
812{
813
814 int abs_local_size = abs(dims[0].local_size);
815
816 if (num_dimensions == 1)
817 {
818 if (dims[0].global_stride > 0)
819 for (int i = 0; i < abs_local_size; ++i)
820 indices[i] = (Xt_int)(start_index + i);
821 else
822 for (int i = 0; i < abs_local_size; ++i)
823 indices[i] = (Xt_int)(start_index - i);
824 return abs_local_size;
825 }
826 else
827 {
828 int indices_written = 0, overflow = 0;
829 assert(num_dimensions > 1);
830 for (int dim_ofs = 0; dim_ofs < abs_local_size; ++dim_ofs)
831 {
832 int indices_written_temp
834 (Xt_int)(start_index
835 + dim_ofs * dims[0].global_stride),
836 indices + indices_written,
837 num_dimensions - 1, dims + 1);
838 overflow |= (indices_written_temp > INT_MAX - indices_written);
839 indices_written += indices_written_temp;
840 }
841 assert(!overflow);
842 return indices_written;
843 }
844}
845
846static void
848{
849 size_t num_indices = (size_t)xt_idxlist_get_num_indices(&section->parent);
850 Xt_int *indices = section->index_array_cache
851 = xmalloc(num_indices * sizeof(*(section->index_array_cache)));
853 (size_t)section->ndim, section->dims);
854}
855
856
857static void
859 INSTR_DEF(instr,"idxsection_get_indices")
860 INSTR_START(instr);
861 Xt_idxsection section = (Xt_idxsection)idxlist;
862
863 int num_indices
865
866 // if the indices are already computed
867 if (section->index_array_cache != NULL);
868 else
870 memcpy(indices, section->index_array_cache,
871 (size_t)num_indices * sizeof(*indices));
872 INSTR_STOP(instr);
873}
874
875static Xt_int const*
877
878 Xt_idxsection idxsection = (Xt_idxsection)idxlist;
879 if (idxsection->index_array_cache == NULL)
881 return idxsection->index_array_cache;
882}
883
884static size_t
886{
887 size_t num_dimensions = (size_t)section->ndim;
888 struct dim_desc *restrict dims = section->dims;
889 size_t i = num_dimensions-1;
890 while (i != SIZE_MAX && dims[i].local_size == 1)
891 --i;
892 size_t nstripes = 1;
893 for (i -= (i != SIZE_MAX); i != SIZE_MAX; --i)
894 nstripes *= (size_t)abs(dims[i].local_size);
895 return nstripes;
896}
897
898static int
900{
901 size_t nstripes
903 return (int)nstripes;
904}
905
906static void
908 struct Xt_stripe *restrict stripes,
909 size_t num_stripes_alloc)
910{
911#ifdef NDEBUG
912 (void)num_stripes_alloc;
913#endif
914 INSTR_DEF(instr,"idxsection_get_index_stripes.part")
915
916 Xt_idxsection section = (Xt_idxsection)idxlist;
917
918 size_t num_dimensions = (size_t)section->ndim;
919 const struct dim_desc *restrict dims = section->dims;
920 /* find last dimension that has local_size > 1 */
921 size_t ln1dim = num_dimensions-1;
922 while (ln1dim != SIZE_MAX && dims[ln1dim].local_size == 1)
923 --ln1dim;
924 ln1dim += ln1dim == SIZE_MAX;
925
926 size_t nstripes = idxsection_get_num_index_stripes_(section);
927
928 assert(nstripes != 0);
929
930 INSTR_START(instr);
931
932 enum { curr_local_position_auto_size=16 };
933 Xt_int curr_local_position_auto[curr_local_position_auto_size];
934 Xt_int *restrict curr_local_position;
935 if (ln1dim <= curr_local_position_auto_size) {
936 curr_local_position = curr_local_position_auto;
937 for (size_t i = 0; i < ln1dim; ++i)
938 curr_local_position[i] = 0;
939 } else
940 curr_local_position
941 = xcalloc(ln1dim, sizeof(*curr_local_position));
942
943 for (size_t i = 0; i < nstripes; ++i) {
944
945 stripes[i].start = section->local_start_index;
946 stripes[i].nstrides = abs(dims[ln1dim].local_size);
947 stripes[i].stride = dims[ln1dim].global_stride;
948
949 for (size_t j = 0; j < ln1dim; ++j)
950 stripes[i].start = (Xt_int)(stripes[i].start
951 + curr_local_position[j]
952 * dims[j].global_stride);
953
954 for (size_t j = ln1dim-1; j != (size_t)-1; --j)
955 if (curr_local_position[j] < abs(dims[j].local_size) - 1) {
956 curr_local_position[j]++;
957 break;
958 } else
959 curr_local_position[j] = 0;
960 }
961 assert(num_stripes_alloc >= nstripes);
962 if (curr_local_position != curr_local_position_auto)
963 free(curr_local_position);
964
965 INSTR_STOP(instr);
966}
967
968static int
970 Xt_int * index) {
971
972 Xt_idxsection section = (Xt_idxsection)idxlist;
973
974 if (position < 0) return 1;
975
976 size_t num_dimensions = (size_t)section->ndim;
977 Xt_int temp_index = section->local_start_index;
978
979 const struct dim_desc *dims = section->dims;
980
981 for (size_t dim = 0; dim < num_dimensions; ++dim) {
982
983 div_t qr = div(position, dims[dim].local_stride);
984 int curr_local_position = qr.quot;
985
986 if (curr_local_position >= abs(dims[dim].local_size))
987 return 1;
988
989 temp_index = (Xt_int)(temp_index
990 + curr_local_position
991 * dims[dim].global_stride);
992 position = qr.rem;
993 }
994
995 *index = temp_index;
996
997 return 0;
998}
999
1000static int
1002 int * position) {
1003
1004 INSTR_DEF(instr,"idxsection_get_position_of_index.part")
1005
1006 Xt_idxsection section = (Xt_idxsection)idxlist;
1007 *position = -1;
1008
1009 if (index < section->min_index_cache || index > section->max_index_cache)
1010 return 1;
1011
1012 int retval = 1;
1013
1014 INSTR_START(instr);
1015
1016 // normalise index (global start of indices at 0)
1017 index = (Xt_int)(index - section->global_start_index);
1018
1019 int temp_position = 0;
1020 size_t num_dimensions = (size_t)section->ndim;
1021 const struct dim_desc *dims = section->dims;
1022 for (size_t i = 0; i < num_dimensions; ++i) {
1023
1024 XT_INT_DIV_T qr = Xt_div(index, dims[i].agsmd,
1025 XT_INT_ABS(dims[i].global_stride));
1026 Xt_int curr_global_position = (Xt_int)qr.quot;
1027
1028 // in case the global size is negative, we have to adjust the global position,
1029 // because the ordering of indices in this dimension is inverted
1030 if (dims[i].global_size < 0)
1031 curr_global_position
1032 = (Xt_int)(-dims[i].global_size - curr_global_position - 1);
1033
1034 index = (Xt_int)qr.rem;
1035
1036 if (curr_global_position < dims[i].local_start)
1037 goto fun_exit;
1038
1039 int curr_local_position
1040 = (int)(curr_global_position - dims[i].local_start);
1041
1042 int abs_local_size = abs(dims[i].local_size);
1043 // same adjustment for local position as for the global one before
1044 if (dims[i].local_size < 0)
1045 curr_local_position = abs_local_size - curr_local_position - 1;
1046
1047 if (curr_local_position >= abs_local_size)
1048 goto fun_exit;
1049
1050 temp_position += (int)curr_local_position * dims[i].local_stride;
1051 }
1052
1053 *position = temp_position;
1054
1055 retval = 0;
1056
1057 fun_exit: ;
1058 INSTR_STOP(instr);
1059 return retval;
1060}
1061
1062static size_t
1064 const Xt_int selection_idx[],
1065 size_t num_selection,
1066 int positions[],
1067 int single_match_only) {
1068
1069 INSTR_DEF(instr,"idxsection_get_positions_of_indices_v1.part")
1070 if (num_selection == 1)
1071 return (size_t)(idxsection_get_position_of_index(body_idxlist,
1072 *selection_idx,
1073 positions));
1074
1075 size_t num_unmatched = 0;
1076
1077 if (!single_match_only) {
1078 // this is the easy case, we don't care about multiple uses of the same position
1079 for (size_t i = 0; i < num_selection; ++i)
1080 num_unmatched
1081 += (size_t)(idxsection_get_position_of_index(body_idxlist,
1082 selection_idx[i],
1083 &positions[i]));
1084 return num_unmatched;
1085 }
1086
1087 INSTR_START(instr);
1088
1089 for (size_t i = 1; i < num_selection; ++i)
1090 if (selection_idx[i] < selection_idx[i-1])
1091 goto unsorted;
1092
1093 // indices are sorted
1094 {
1095 // we need an index that is different from the current one
1096 Xt_int prev_index = (Xt_int)(selection_idx[0] - 1);
1097
1098 for (size_t i = 0; i < num_selection; i++) {
1099
1100 Xt_int curr_index = selection_idx[i];
1101
1102 if (prev_index != curr_index) {
1103
1104 num_unmatched
1105 += (size_t)(idxsection_get_position_of_index(body_idxlist, curr_index,
1106 positions + i));
1107 prev_index = curr_index;
1108
1109 } else {
1110 // for an idxsection there is a unique map from indices to positions,
1111 // we got the same index again, so there is no match left:
1112 positions[i] = -1;
1113 num_unmatched++;
1114 }
1115 }
1116 }
1117 goto end;
1118 // indices are not sorted
1119unsorted:
1120 {
1121 // the remaining (single_match_only) case follows:
1122 idxpos_type *v = xmalloc(num_selection * sizeof(*v) );
1123 for (size_t i = 0; i < num_selection; i++) {
1124 v[i].idx = selection_idx[i];
1125 v[i].pos = (int)i;
1126 }
1127 xt_default_config.sort_funcs->sort_idxpos(v, num_selection);
1128 Xt_int last_jx = (Xt_int)(v[0].idx - 1); // any index that does not equal v[0].idx will do
1129 for (size_t i = 0; i < num_selection; i++) {
1130 int j = v[i].pos;
1131 Xt_int jx = v[i].idx;
1132 if (jx != last_jx) {
1133 num_unmatched
1134 += (size_t)(idxsection_get_position_of_index(body_idxlist, jx,
1135 &positions[j]));
1136 last_jx = jx;
1137 } else {
1138 // for an idxsection there is a unique map from indices to positions,
1139 // we got the same index again, so there is no match left:
1140 positions[j] = -1;
1141 num_unmatched++;
1142 }
1143 }
1144 free(v);
1145 }
1146end:
1147 INSTR_STOP(instr);
1148 return num_unmatched;
1149}
1150
1151static size_t
1153 const Xt_int selection_idx[],
1154 size_t num_selection, int positions[],
1155 int single_match_only) {
1156
1157 INSTR_DEF(instr,"idxsection_get_positions_of_indices_v2.part")
1158
1159 if (num_selection == 1)
1160 return (size_t)(idxsection_get_position_of_index(body_idxlist,
1161 *selection_idx, positions));
1162
1163 INSTR_START(instr);
1164
1165 Xt_int * temp_selection_idx = NULL;
1166 const Xt_int *restrict sorted_selection_idx;
1167 int * selection_pos = NULL;
1168
1169 for (size_t i = 1; i < num_selection; ++i)
1170 if (selection_idx[i] < selection_idx[i-1])
1171 goto unsorted;
1172
1173 sorted_selection_idx = selection_idx;
1174 goto sorted;
1175unsorted:
1176 // the indices are not sorted
1177 temp_selection_idx
1178 = xmalloc(num_selection * sizeof(*temp_selection_idx));
1179 memcpy(temp_selection_idx, selection_idx,
1180 num_selection * sizeof(*temp_selection_idx));
1181 selection_pos = xmalloc(num_selection * sizeof(*selection_pos));
1182
1183 xt_assign_id_map_int(num_selection, selection_pos, 0);
1185 temp_selection_idx, num_selection, selection_pos);
1186 sorted_selection_idx = temp_selection_idx;
1187
1188sorted: ;
1189 const Xt_int *body_indices = idxsection_get_indices_const(body_idxlist);
1190 size_t num_body_indices = (size_t)xt_idxlist_get_num_indices(body_idxlist);
1191
1192 // Xt_int last_idx = sorted_selection_idx[0] - 1;
1193 //
1194 // for (i = 0, j = 0; i < num_selection && j < num_body_indices; ++i) {
1195 //
1196 // while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1197 //
1198 // if (j >= num_body_indices) break;
1199 //
1200 // if (!single_match_only)
1201 // positions[(selection_pos == NULL)?i:selection_pos[i]] =
1202 // (body_indices[j] == sorted_selection_idx[i])?j:-1;
1203 // else
1204 // positions[selection_pos[i]] =
1205 // ((last_idx == sorted_selection_idx[i]) ||
1206 // (body_indices[j] != sorted_selection_idx[i]))?-1:j;
1207 // }
1208
1209 // the following loops are an unrolled version of the one above
1210 size_t i = 0;
1211 if (!single_match_only) {
1212
1213 if (selection_pos == NULL) {
1214 for (size_t j = 0; i < num_selection && j < num_body_indices; ++i) {
1215
1216 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1217
1218 if (j >= num_body_indices) break;
1219
1220 positions[i] = (body_indices[j] == sorted_selection_idx[i])?(int)j:-1;
1221 }
1222 } else {
1223 for (size_t j = 0; i < num_selection && j < num_body_indices; ++i) {
1224
1225 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1226
1227 if (j >= num_body_indices) break;
1228
1229 positions[selection_pos[i]] = (body_indices[j] == sorted_selection_idx[i])?(int)j:-1;
1230 }
1231 }
1232 } else {
1233
1234 Xt_int last_idx = (Xt_int)(sorted_selection_idx[0] - 1);
1235
1236 if (selection_pos == NULL) {
1237 for (size_t j = 0; i < num_selection && j < num_body_indices; ++i) {
1238
1239 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1240
1241 if (j >= num_body_indices) break;
1242
1243 positions[i] = ((last_idx == sorted_selection_idx[i]) ||
1244 (body_indices[j] != sorted_selection_idx[i]))?-1:(int)j;
1245
1246 last_idx = sorted_selection_idx[i];
1247 }
1248 } else {
1249 for (size_t j = 0; i < num_selection && j < num_body_indices; ++i) {
1250
1251 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1252
1253 if (j >= num_body_indices) break;
1254
1255 positions[selection_pos[i]] = ((last_idx == sorted_selection_idx[i]) ||
1256 (body_indices[j] != sorted_selection_idx[i]))?-1:(int)j;
1257
1258 last_idx = sorted_selection_idx[i];
1259 }
1260 }
1261 }
1262
1263 // process indices that were not handled by the loop above
1264 if (selection_pos == NULL)
1265 for (; i < num_selection; ++i)
1266 positions[i] = -1;
1267 else
1268 for (; i < num_selection; ++i)
1269 positions[selection_pos[i]] = -1;
1270
1271 free(temp_selection_idx);
1272 free(selection_pos);
1273
1274 size_t num_unmatched = 0;
1275
1276 // count the number of unmachted indices
1277 for (size_t j = 0; j < num_selection; ++j)
1278 num_unmatched += positions[j] == -1;
1279
1280 INSTR_STOP(instr);
1281 return num_unmatched;
1282}
1283
1284static size_t
1286 int position_offset,
1287 const Xt_int indices[],
1288 size_t num_indices,
1289 int positions[],
1290 int ndim,
1291 struct dim_desc dims[ndim])
1292{
1293 size_t num_processed = 0;
1294
1295 Xt_int abs_global_size = XT_INT_ABS(dims[0].global_size);
1296 int abs_local_size = abs(dims[0].local_size);
1297 Xt_int abs_global_stride = XT_INT_ABS(dims[0].global_stride);
1298 Xt_int local_start = dims[0].local_start;
1299 // we want to work on ascending indices in the lowest dimension -> have to
1300 // adjust in case of negative global size
1301 if (dims[0].global_size < 0)
1302 local_start = (Xt_int)(abs_global_size - local_start - abs_local_size);
1303
1304 Xt_int min_index
1305 = (Xt_int)(index_offset + local_start * abs_global_stride);
1306
1307 // set all indices that are smaller than the minimum to "not found"
1308 while ((num_processed < num_indices)
1309 && (indices[num_processed] < min_index))
1310 positions[num_processed++] = -1;
1311
1312 if (ndim == 1)
1313 {
1314
1315 // if either the local or the global dimension is negative
1316 if (dims[0].global_stride < 0) {
1317
1318 // for as long as we are in the range local section of the current
1319 // global dimension
1320 Xt_int curr_position;
1321 while ((num_processed < num_indices) &&
1322 ((curr_position = (Xt_int)(indices[num_processed] - min_index)) <
1323 abs_local_size)) {
1324
1325 positions[num_processed++] = position_offset
1326 + (int)(abs_local_size - curr_position - 1);
1327 }
1328 } else { // if the local and global dimension are both negative or positive
1329
1330 // for as long as we are in the range local section of the current
1331 // global dimension
1332 Xt_int curr_position;
1333 while ((num_processed < num_indices) &&
1334 ((curr_position = (Xt_int)(indices[num_processed] - min_index)) <
1335 abs_local_size)) {
1336
1337 positions[num_processed++] = position_offset + (int)curr_position;
1338 }
1339 }
1340
1341 // for all remaining indices that are in the current global dimension but not
1342 // within the local section
1343 while ((num_processed < num_indices) &&
1344 (indices[num_processed] < index_offset + abs_global_size))
1345 positions[num_processed++] = -1;
1346
1347 } else {
1348
1349 assert(ndim > 1);
1350
1351 // while there are indices that have not yet been processed
1352 while (num_processed < num_indices) {
1353
1354 // compute global position of the smallest index that has not yet been processed
1355 XT_INT_DIV_T qr
1356 = Xt_div((Xt_int)(indices[num_processed] - index_offset),
1357 dims[0].agsmd, abs_global_stride);
1358 Xt_int curr_global_position = (Xt_int)qr.quot;
1359
1360 // if the position is outside of the range of the current dimension
1361 Xt_int abs_local_position
1362 = (Xt_int)(curr_global_position - local_start);
1363 if (abs_local_position >= abs_local_size)
1364 break;
1365
1366 int curr_local_position
1367 = dims[0].global_stride < 0
1368 // if either the local or the global dimension is negative
1369 ? (int)(abs_local_size - abs_local_position - 1)
1370 // if the local and global dimension are both negative or positive
1371 : (int)abs_local_position;
1372 Xt_int curr_index_offset
1373 = (Xt_int)(index_offset + curr_global_position * abs_global_stride);
1374 /* curr_local_position * dims[0].local_stride <= INT_MAX
1375 * because abs_local_position < abs_local_size and total mesh
1376 * size is less than INT_MAX */
1377 int position_offset_ = position_offset + curr_local_position * dims[0].local_stride;
1378
1380 curr_index_offset, position_offset_, indices + num_processed,
1381 num_indices - num_processed, positions + num_processed, ndim-1,
1382 dims + 1);
1383 }
1384 }
1385
1386 return num_processed;
1387}
1388
1389static size_t
1391 const Xt_int *restrict selection_idx,
1392 size_t num_selection,
1393 int *restrict positions,
1394 int single_match_only) {
1395
1396 INSTR_DEF(instr,"idxsection_get_positions_of_indices_v3.part")
1397 INSTR_DEF(instr2,"idxsection_get_positions_of_indices_recursive")
1398
1399 Xt_idxsection section = (Xt_idxsection)body_idxlist;
1400
1401 if (num_selection == 1)
1402 return (size_t)(idxsection_get_position_of_index(body_idxlist,
1403 *selection_idx,
1404 positions));
1405
1406 INSTR_START(instr);
1407
1408 const Xt_int * restrict sorted_selection_idx;
1409 Xt_int *temp_selection_idx = NULL;
1410 int *sorted_positions;
1411 int *selection_pos = NULL;
1412
1413 for (size_t i = 1; i < num_selection; ++i)
1414 if (selection_idx[i] < selection_idx[i-1])
1415 goto unsorted_selection;
1416
1417 sorted_selection_idx = selection_idx;
1418 sorted_positions = positions;
1419 goto sorted_selection;
1420 // if the selection is not sorted
1421unsorted_selection:
1422 temp_selection_idx
1423 = xmalloc(num_selection * sizeof(*temp_selection_idx));
1424 {
1425 size_t num_sp_alloc = num_selection;
1426#if defined _CRAYC && _RELEASE_MAJOR < 9
1427 num_sp_alloc = (num_sp_alloc + _MAXVL_32 - 1) & ~(_MAXVL_32 - 1);
1428#endif
1429 size_t total_alloc = num_sp_alloc + num_selection;
1430 sorted_positions
1431 = xmalloc(total_alloc * sizeof(*sorted_positions));
1432 selection_pos = sorted_positions + num_sp_alloc;
1433 }
1434 memcpy(temp_selection_idx, selection_idx,
1435 num_selection * sizeof(*temp_selection_idx));
1436
1437 xt_assign_id_map_int(num_selection, selection_pos, 0);
1439 temp_selection_idx, num_selection, selection_pos);
1440 sorted_selection_idx = temp_selection_idx;
1441sorted_selection:
1442
1443 INSTR_START(instr2);
1444
1445 size_t num_processed
1447 section->global_start_index,
1448 0, sorted_selection_idx, num_selection,
1449 sorted_positions, section->ndim,
1450 section->dims);
1451
1452 INSTR_STOP(instr2);
1453
1454 // set remaining index positions to -1
1455 for (size_t i = num_processed; i < num_selection; ++i)
1456 sorted_positions[i] = -1;
1457
1458 // apply single match only rule
1459 if (single_match_only)
1460 for (size_t i = 1; i < num_processed; ++i)
1461 if (sorted_selection_idx[i] == sorted_selection_idx[i-1])
1462 sorted_positions[i] = -1;
1463
1464 // convert positions if unsorted
1465 if (sorted_selection_idx != selection_idx) {
1466
1467 for (size_t i = 0; i < num_selection; ++i)
1468 positions[selection_pos[i]] = sorted_positions[i];
1469
1470 free(sorted_positions);
1471 free(temp_selection_idx);
1472 }
1473
1474 // count the number of unmached indices
1475 size_t num_unmatched = num_selection - num_processed;
1476
1477 for (size_t i = 0; i < num_processed; ++i)
1478 num_unmatched += positions[i] == -1;
1479
1480 INSTR_STOP(instr);
1481
1482 return num_unmatched;
1483}
1484
1485static size_t
1487 const Xt_int *selection_idx,
1488 size_t num_selection, int * positions,
1489 int single_match_only) {
1490
1491 INSTR_DEF(instr,"idxsection_get_positions_of_indices")
1492 Xt_idxsection section = (Xt_idxsection)body_idxlist;
1493 size_t retval = 0, num_section_indices;
1494
1495
1496 INSTR_START(instr);
1497
1498 // if any dimension of the body index list is negative we have to use the
1499 // v3 version, because the other version cannot handle negative sizes
1500 if ((section->flags & sort_mask) != sort_asc)
1501 retval = idxsection_get_positions_of_indices_v3(body_idxlist, selection_idx,
1502 num_selection, positions,
1503 single_match_only);
1504 /*
1505 * if the indices are already cached or (if the caching would not
1506 * consume too much memory and the number of selection indices are
1507 * sufficient to justify the use of cached indices)
1508 */
1509 else if ((section->index_array_cache != NULL) ||
1510 (((num_section_indices
1511 = (size_t)xt_idxlist_get_num_indices(body_idxlist))
1512 * sizeof(Xt_int)
1513 <= (size_t)128 * 1024U * 1024U)
1514 && (num_section_indices <= 1000 * num_selection)))
1515 retval = idxsection_get_positions_of_indices_v2(body_idxlist, selection_idx,
1516 num_selection, positions,
1517 single_match_only);
1518 else
1519 retval = idxsection_get_positions_of_indices_v1(body_idxlist, selection_idx,
1520 num_selection, positions,
1521 single_match_only);
1522
1523 INSTR_STOP(instr);
1524 return retval;
1525}
1526
1527static int
1529 int * position, int offset) {
1530
1531 int temp_position;
1532 // we make use of the uniqueness of the index-to-position relation:
1533 if (idxsection_get_position_of_index(idxlist, index, &temp_position)
1534 || temp_position < offset)
1535 temp_position = -1;
1536 *position = temp_position;
1537 return temp_position == -1;
1538}
1539
1540static Xt_int
1542
1543 Xt_idxsection section = (Xt_idxsection)idxlist;
1544 return section->min_index_cache;
1545}
1546
1547static Xt_int
1549
1550 Xt_idxsection section = (Xt_idxsection)idxlist;
1551 return section->max_index_cache;
1552}
1553
1554static int
1556{
1557 unsigned sort_flags = (((Xt_idxsection)idxlist)->flags) & sort_mask;
1558 return (int)sort_flags-(sort_flags < 3);
1559}
1560
1561/*
1562 * Local Variables:
1563 * c-basic-offset: 2
1564 * coding: utf-8
1565 * indent-tabs-mode: nil
1566 * show-trailing-whitespace: t
1567 * require-trailing-newline: t
1568 * End:
1569 */
int MPI_Comm
Definition core.h:64
#define __attribute__(x)
Definition core.h:82
#define INSTR_STOP(T)
Definition instr.h:69
#define INSTR_DEF(T, S)
Definition instr.h:66
#define INSTR_START(T)
Definition instr.h:68
add versions of standard API functions not returning on error
#define xcalloc(nmemb, size)
Definition ppm_xfuncs.h:68
#define xmalloc(size)
Definition ppm_xfuncs.h:70
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
Xt_int * index_array_cache
Xt_int global_start_index
struct Xt_idxlist_ parent
Xt_int local_start_index
struct dim_desc dims[]
void(* sort_xt_int_permutation)(Xt_int a[], size_t n, int permutation[])
void(* sort_idxpos)(idxpos_type *v, size_t n)
void(* sort_xt_int)(Xt_int *a, size_t n)
Xt_int global_size
Xt_int local_start
Xt_int global_stride
int MPI_Type_create_struct(int count, XT_MPI2_CONST int array_of_block_lengths[], XT_MPI2_CONST MPI_Aint array_of_displacements[], XT_MPI2_CONST MPI_Datatype array_of_types[], MPI_Datatype *newtype)
int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype *newtype)
int MPI_Type_free(MPI_Datatype *datatype)
int MPI_Type_commit(MPI_Datatype *datatype)
#define Xt_div(num, muldiv, den)
static int isign(int x)
static Xt_int Xt_isign(Xt_int x)
struct Xt_config_ xt_default_config
Definition xt_config.c:204
implementation of configuration object
int xt_initialized(void)
#define Xt_int_dt
Definition xt_core.h:73
XT_INT Xt_int
Definition xt_core.h:72
Xt_idxlist xt_idxempty_new(void)
index list declaration
const Xt_int * xt_idxlist_get_indices_const(Xt_idxlist idxlist)
Definition xt_idxlist.c:119
Provide non-public declarations common to all index lists.
PPM_DSO_INTERNAL Xt_idxlist xt_default_isect(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
#define xt_idxlist_get_num_indices(idxlist)
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
@ STRIPES
@ SECTION
Xt_idxlist xt_idxsection_get_idxstripes_r_intersection(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, Xt_config config)
Xt_idxlist xt_idxsection_new(Xt_int start, int num_dimensions, const Xt_int global_size[num_dimensions], const int local_size[num_dimensions], const Xt_int local_start[num_dimensions])
static Xt_idxlist idxsection_copy(Xt_idxlist idxlist)
static size_t idxsection_get_positions_of_indices(Xt_idxlist body_idxlist, Xt_int const *selection_idx, size_t num_selection, int *positions, int single_match_only)
static const struct xt_idxlist_vtable idxsection_vtable
Xt_idxlist xt_idxsection_get_intersection_with_other_idxlist(Xt_idxlist src_idxsection, Xt_idxlist dst_idxlist, Xt_config config)
static struct section_aggregate setup_dims(Xt_int start, size_t num_dimensions, struct dim_desc dims[num_dimensions])
static size_t idxsection_get_positions_of_indices_v1(Xt_idxlist body_idxlist, const Xt_int selection_idx[], size_t num_selection, int positions[], int single_match_only)
static size_t idxsection_get_positions_of_indices_v2(Xt_idxlist body_idxlist, const Xt_int selection_idx[], size_t num_selection, int positions[], int single_match_only)
void xt_idxsection_initialize(void)
static void idxsection_create_index_array_cache(Xt_idxsection section)
static int idxsection_get_indices_any(Xt_int start_index, Xt_int *indices, size_t num_dimensions, struct dim_desc dims[num_dimensions])
static MPI_Datatype dim_desc_dt
void xt_idxsection_finalize(void)
static int idxsection_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static struct Xt_minmax get_section_minmax(size_t num_dimensions, Xt_int local_start_index, const struct dim_desc dims[num_dimensions])
static size_t idxsection_get_num_index_stripes_(Xt_idxsection section)
static int get_num_indices_from_local_sizes(size_t num_dimensions, const int local_size[num_dimensions])
static Xt_int idxsection_get_max_index(Xt_idxlist idxlist)
static void idxsection_delete(Xt_idxlist data)
Xt_idxlist xt_idxsection_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
static void idxsection_init_sorted_copy(Xt_idxsection orig, Xt_idxsection copy)
static int idxsection_get_num_indices(Xt_idxsection section)
static int idxsection_get_num_index_stripes(Xt_idxlist idxlist)
static void idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static size_t idxsection_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static size_t idxsection_get_positions_of_indices_recursive(Xt_int index_offset, int position_offset, const Xt_int indices[], size_t num_indices, int positions[], int ndim, struct dim_desc dims[ndim])
static int idxsection_get_sorting(Xt_idxlist idxlist)
static int idxsection_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static size_t idxsection_get_positions_of_indices_v3(Xt_idxlist body_idxlist, const Xt_int *restrict selection_idx, size_t num_selection, int *restrict positions, int single_match_only)
static int idxsection_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static Xt_idxlist idxsection_sorted_copy(Xt_idxlist idxlist, Xt_config config)
struct Xt_idxsection_ * Xt_idxsection
Xt_idxlist xt_idxsection_get_idxstripes_intersection(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, Xt_config config)
static void idxsection_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe *restrict stripes, size_t num_stripes_alloc)
static Xt_int idxsection_get_min_index(Xt_idxlist idxlist)
Xt_idxlist xt_idxsection_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
static void idxsection_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static const Xt_int * idxsection_get_indices_const(Xt_idxlist idxlist)
struct Xt_vec_alloc xt_idxvec_alloc(int num_indices)
Definition xt_idxvec.c:200
Xt_idxlist xt_idxvec_congeal(struct Xt_vec_alloc vec_alloc)
Definition xt_idxvec.c:284
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition xt_mpi.h:68
void xt_assign_id_map_int(size_t n, int *restrict a, int ofs)
Definition xt_sort.c:77
#define XT_SORT_FLAGS(ntrans_up, ntrans_dn)
@ sort_mask
@ sort_asc