Highly Efficient FFT for Exascale: HeFFTe v2.4
Loading...
Searching...
No Matches
heffte_fft3d.h
1/*
2 -- heFFTe --
3 Univ. of Tennessee, Knoxville
4 @date
5*/
6
7#ifndef HEFFTE_FFT3D_H
8#define HEFFTE_FFT3D_H
9
10#include "heffte_compute_transform.h"
11
46namespace heffte {
47
58template<typename scalar_type> struct fft_output{
60 using type = scalar_type;
61};
66template<> struct fft_output<float>{
68 using type = std::complex<float>;
69};
74template<> struct fft_output<double>{
76 using type = std::complex<double>;
77};
78
85template<typename scalar_type, typename backend_tag, typename = void>
91template<typename scalar_type, typename backend_tag>
92struct transform_output<scalar_type, backend_tag, typename std::enable_if<backend::uses_fft_types<backend_tag>::value>::type>{
95};
100template<typename scalar_type, typename backend_tag>
101struct transform_output<scalar_type, backend_tag, typename std::enable_if<not backend::uses_fft_types<backend_tag>::value>::type>{
103 using type = scalar_type;
104};
105
112template<typename scalar_type, typename backend_tag, typename = void>
118template<typename scalar_type, typename backend_tag>
119struct transform_real_type<scalar_type, backend_tag, typename std::enable_if<backend::uses_fft_types<backend_tag>::value>::type>{
122};
127template<typename scalar_type, typename backend_tag>
128struct transform_real_type<scalar_type, backend_tag, typename std::enable_if<not backend::uses_fft_types<backend_tag>::value>::type>{
130 using type = scalar_type;
131};
132
133
140enum class scale{
142 none,
144 full,
147};
148
239template<typename backend_tag, typename index = int>
240class fft3d : public backend::device_instance<typename backend::buffer_traits<backend_tag>::location>{
241public:
251 template<typename T> using buffer_container = typename backend::buffer_traits<backend_tag>::template container<T>;
256
261
270 fft3d(box3d<index> const inbox, box3d<index> const outbox, MPI_Comm const comm,
271 plan_options const options = default_options<backend_tag>()) :
272 fft3d(plan_operations(mpi::gather_boxes(inbox, outbox, comm), -1, set_options<backend_tag>(options), mpi::comm_rank(comm)), comm){
273 static_assert(backend::is_enabled<backend_tag>::value, "The requested backend is invalid or has not been enabled.");
274 }
293 box3d<index> const inbox, box3d<index> const outbox, MPI_Comm const comm,
294 plan_options const options = default_options<backend_tag>()) :
295 fft3d(gpu_stream, plan_operations(mpi::gather_boxes(inbox, outbox, comm), -1, set_options<backend_tag>(options), mpi::comm_rank(comm)), comm){
296 static_assert(backend::is_enabled<backend_tag>::value, "The requested backend is invalid or has not been enabled.");
297 }
298
299
301 fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2,
302 int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2,
303 MPI_Comm const comm,
304 bool use_reorder, int algorithm, bool use_pencils)
305 : fft3d(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
306 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
307 comm,
308 plan_options(use_reorder, static_cast<reshape_algorithm>(algorithm), use_pencils))
309 {}
311 fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2,
312 int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2,
313 MPI_Comm const comm)
314 : fft3d(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
315 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
316 comm)
317 {}
319 fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2,
320 int ol0, int ol1, int ol2, int oh0, int oh1, int oh2,
321 MPI_Comm const comm)
322 : fft3d(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}), box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}), comm)
323 {}
324
326 long long size_inbox() const{ return pinbox->count(); }
328 long long size_outbox() const{ return poutbox->count(); }
330 box3d<index> inbox() const{ return *pinbox; }
332 box3d<index> outbox() const{ return *poutbox; }
333
353 template<typename input_type, typename output_type>
354 void forward(input_type const input[], output_type output[], scale scaling = scale::none) const{
356 "Using either an unknown complex type or an incompatible pair of types!");
357
358 auto workspace = make_buffer_container<typename transform_output<typename define_standard_type<output_type>::type, backend_tag>::type>(this->stream(), size_workspace());
359 forward(convert_to_standard(input), convert_to_standard(output), workspace.data(), scaling);
360 }
361
374 template<typename input_type, typename output_type>
375 void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling = scale::none) const{
377 "Using either an unknown complex type or an incompatible pair of types!");
378
379 compute_transform<location_tag, index>(this->stream(), 1, convert_to_standard(input), convert_to_standard(output),
380 convert_to_standard(workspace),
381 executor_buffer_offset, size_comm_buffers(), forward_shaper,
382 forward_executors(), direction::forward);
383 apply_scale(1, direction::forward, scaling, convert_to_standard(output));
384 }
391 template<typename input_type, typename output_type>
392 void forward(int const batch_size, input_type const input[], output_type output[],
393 output_type workspace[], scale scaling = scale::none) const{
395 "Using either an unknown complex type or an incompatible pair of types!");
396
397 compute_transform<location_tag, index>(this->stream(), batch_size, convert_to_standard(input), convert_to_standard(output),
398 convert_to_standard(workspace),
399 executor_buffer_offset, size_comm_buffers(), forward_shaper,
400 forward_executors(), direction::forward);
401 apply_scale(batch_size, direction::forward, scaling, convert_to_standard(output));
402 }
406 template<typename input_type, typename output_type>
407 void forward(int const batch_size, input_type const input[], output_type output[], scale scaling = scale::none) const{
409 "Using either an unknown complex type or an incompatible pair of types!");
410
411 auto workspace = make_buffer_container<typename transform_output<typename define_standard_type<output_type>::type, backend_tag>::type>(this->stream(), batch_size * size_workspace());
412
413 forward(batch_size, input, output, workspace.data(), scaling);
414 }
415
440 template<typename input_type>
442 if (input.size() < static_cast<size_t>(size_inbox()))
443 throw std::invalid_argument("The input vector is smaller than size_inbox(), i.e., not enough entries provided to fill the inbox.");
444 auto output = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->stream(), size_outbox());
445 forward(input.data(), output.data(), scaling);
446 return output;
447 }
448
468 template<typename input_type, typename output_type>
469 void backward(input_type const input[], output_type output[], scale scaling = scale::none) const{
471 "Using either an unknown complex type or an incompatible pair of types!");
472
473 auto workspace = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->stream(), size_workspace());
474 backward(input, output, workspace.data(), scaling);
475 }
476
480 template<typename input_type, typename output_type>
481 void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling = scale::none) const{
483 "Using either an unknown complex type or an incompatible pair of types!");
484
485 compute_transform<location_tag, index>(this->stream(), 1, convert_to_standard(input), convert_to_standard(output),
486 convert_to_standard(workspace),
487 executor_buffer_offset, size_comm_buffers(), backward_shaper,
488 backward_executors(), direction::backward);
489 apply_scale(1, direction::backward, scaling, convert_to_standard(output));
490 }
494 template<typename input_type, typename output_type>
495 void backward(int const batch_size, input_type const input[], output_type output[],
496 input_type workspace[], scale scaling = scale::none) const{
498 "Using either an unknown complex type or an incompatible pair of types!");
499
500 compute_transform<location_tag, index>(this->stream(), batch_size, convert_to_standard(input), convert_to_standard(output),
501 convert_to_standard(workspace),
502 executor_buffer_offset, size_comm_buffers(), backward_shaper,
503 backward_executors(), direction::backward);
504 apply_scale(batch_size, direction::backward, scaling, convert_to_standard(output));
505 }
509 template<typename input_type, typename output_type>
510 void backward(int const batch_size, input_type const input[], output_type output[], scale scaling = scale::none) const{
512 "Using either an unknown complex type or an incompatible pair of types!");
513
514 auto workspace = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->stream(), batch_size * size_workspace());
515 backward(batch_size, input, output, workspace.data(), scaling);
516 }
517
521 template<typename scalar_type>
524 "Either calling backward() with non-complex input or using an unknown complex type.");
525 if (input.size() < static_cast<size_t>(size_outbox()))
526 throw std::invalid_argument("The input vector is smaller than size_outbox(), i.e., not enough entries provided to fill the outbox.");
527 auto result = make_buffer_container<scalar_type>(this->stream(), size_inbox());
528 backward(input.data(), result.data(), scaling);
529 return result;
530 }
531
535 template<typename scalar_type>
538 "Either calling backward() with non-complex input or using an unknown complex type.");
539 auto result = make_buffer_container<typename transform_real_type<scalar_type, backend_tag>::type>(this->stream(), size_inbox());
540 backward(input.data(), result.data(), scaling);
541 return result;
542 }
543
545 double get_scale_factor(scale scaling) const{
546 return (scaling == scale::symmetric) ? std::sqrt(scale_factor) : scale_factor;
547 }
548
550 size_t size_workspace() const{ return size_buffer_work; }
552 size_t size_comm_buffers() const{ return comm_buffer_offset; }
553
554private:
574 fft3d(logic_plan3d<index> const &plan, MPI_Comm const comm) :
575 backend::device_instance<location_tag>(),
576 pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
577 scale_factor(1.0 / static_cast<double>(plan.index_count))
578 #ifdef Heffte_ENABLE_MAGMA
579 , hmagma(this->stream())
580 #endif
581 {
582 setup(plan, comm);
583 }
584
587 logic_plan3d<index> const &plan, MPI_Comm const comm) :
588 backend::device_instance<location_tag>(gpu_stream),
589 pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
590 scale_factor(1.0 / static_cast<double>(plan.index_count))
591 #ifdef Heffte_ENABLE_MAGMA
592 , hmagma(this->stream())
593 #endif
594 {
595 setup(plan, comm);
596 }
597
599 void setup(logic_plan3d<index> const &plan, MPI_Comm const comm){
600 for(int i=0; i<4; i++){
601 forward_shaper[i] = make_reshape3d<backend_tag>(this->stream(), plan.in_shape[i], plan.out_shape[i], comm, plan.options);
602 backward_shaper[3-i] = make_reshape3d<backend_tag>(this->stream(), plan.out_shape[i], plan.in_shape[i], comm, plan.options);
603 }
604
605 int const my_rank = plan.mpi_rank;
606
607 if (has_executor3d<backend_tag>() and not forward_shaper[1] and not forward_shaper[2]){
608 executors[0] = make_executor<backend_tag>(this->stream(), plan.out_shape[0][my_rank]);
609 }else if (has_executor2d<backend_tag>() and (not forward_shaper[1] or not forward_shaper[2])){
610 if (not forward_shaper[1]){
611 executors[0] = make_executor<backend_tag>(this->stream(), plan.out_shape[0][my_rank],
612 plan.fft_direction[0], plan.fft_direction[1]);
613 executors[2] = make_executor<backend_tag>(this->stream(), plan.out_shape[2][my_rank], plan.fft_direction[2]);
614 }else{
615 executors[0] = make_executor<backend_tag>(this->stream(), plan.out_shape[0][my_rank], plan.fft_direction[0]);
616 executors[2] = make_executor<backend_tag>(this->stream(), plan.out_shape[2][my_rank],
617 plan.fft_direction[1], plan.fft_direction[2]);
618 }
619 }else{
620 executors[0] = make_executor<backend_tag>(this->stream(), plan.out_shape[0][my_rank], plan.fft_direction[0]);
621 executors[1] = make_executor<backend_tag>(this->stream(), plan.out_shape[1][my_rank], plan.fft_direction[1]);
622 executors[2] = make_executor<backend_tag>(this->stream(), plan.out_shape[2][my_rank], plan.fft_direction[2]);
623 }
624
625 size_t executor_workspace_size = get_max_work_size(executors);
626 comm_buffer_offset = std::max(get_workspace_size(forward_shaper), get_workspace_size(backward_shaper));
627 // the last junk of (fft0->box_size() + 1) / 2 is used only when doing complex-to-real backward transform
628 // maybe update the API to call for different size buffers for different complex/real types
629 int last_chunk = (executors[0] == nullptr) ? 0 : (((backward_shaper[3]) ? (executors[0]->box_size() + 1) / 2 : 0));
630 size_buffer_work = comm_buffer_offset + executor_workspace_size
631 + get_max_box_size(executors)
632 + last_chunk;
633 executor_buffer_offset = (executor_workspace_size == 0) ? 0 : size_buffer_work - executor_workspace_size;
634
635 if (not backend::uses_fft_types<backend_tag>::value){
636 if (std::is_same<backend_tag, backend::fftw_cos>::value or
637 std::is_same<backend_tag, backend::fftw_sin>::value) {
638 scale_factor /= 8.0;
639 }else if (std::is_same<backend_tag, backend::fftw_cos1>::value) {
640 scale_factor = 1.0 / (8.0 * (plan.fft_sizes[0] - 1) * (plan.fft_sizes[1] - 1) * (plan.fft_sizes[2] - 1));
641 }else if (std::is_same<backend_tag, backend::fftw_sin1>::value) {
642 scale_factor = 1.0 / (8.0 * (plan.fft_sizes[0] + 1) * (plan.fft_sizes[1] + 1) * (plan.fft_sizes[2] + 1));
643 }else if(std::is_same<backend_tag, backend::cufft_cos1>::value or
644 std::is_same<backend_tag, backend::rocfft_cos1>::value or
645 std::is_same<backend_tag, backend::stock_cos1>::value){
646 scale_factor = 1.0 / (64.0 * (plan.fft_sizes[0] - 1) * (plan.fft_sizes[1] - 1) * (plan.fft_sizes[2] - 1));
647 }else{
648 scale_factor /= 64.0;
649 }
650 }
651 }
653 std::array<executor_base*, 3> forward_executors() const{
654 return std::array<executor_base*, 3>{executors[0].get(), executors[1].get(), executors[2].get()};
655 }
657 std::array<executor_base*, 3> backward_executors() const{
658 return std::array<executor_base*, 3>{executors[2].get(), executors[1].get(), executors[0].get()};
659 }
660
662 template<typename scalar_type>
663 void apply_scale(int const batch_size, direction dir, scale scaling, scalar_type data[]) const{
664 if (scaling != scale::none){
665 add_trace name("scale");
666 #ifdef Heffte_ENABLE_MAGMA
667 if (std::is_same<typename backend::buffer_traits<backend_tag>::location, tag::gpu>::value){
668 hmagma.scal(batch_size * ((dir == direction::forward) ? size_outbox() : size_inbox()),
669 get_scale_factor(scaling), data);
670 return;
671 }
672 #endif
674 this->stream(),
675 batch_size * ((dir == direction::forward) ? size_outbox() : size_inbox()),
676 data, get_scale_factor(scaling));
677 }
678 }
679
680 std::unique_ptr<box3d<index>> pinbox, poutbox; // inbox/output for this process
681 double scale_factor;
682 std::array<std::unique_ptr<reshape3d_base<index>>, 4> forward_shaper;
683 std::array<std::unique_ptr<reshape3d_base<index>>, 4> backward_shaper;
684
685 std::array<std::unique_ptr<executor_base>, 3> executors;
686 #ifdef Heffte_ENABLE_MAGMA
687 gpu::magma_handle<typename backend::buffer_traits<backend_tag>::location> hmagma;
688 #endif
689
690 // cache some values for faster read
691 size_t size_buffer_work, comm_buffer_offset, executor_buffer_offset;
692};
693
702template<typename backend_tag, typename index = int>
704
748template<typename backend_tag, typename index = int>
750
755template<typename backend_tag, typename index>
756fft3d<backend_tag, index> make_fft3d(box3d<index> const inbox, box3d<index> const outbox, MPI_Comm const comm,
758 static_assert(std::is_same<index, int>::value or std::is_same<index, long long>::value,
759 "heFFTe works with 'int' and 'long long' indexing only");
761 "the backend_tag is not valid, perhaps it needs to be enabled in the build system");
762 return fft3d<backend_tag, index>(inbox, outbox, comm, options);
763}
764
765}
766
767#endif
Defines the plan for a 3-dimensional discrete Fourier transform performed on a MPI distributed data.
Definition heffte_fft3d.h:240
buffer_container< scalar_type > backward(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Perform complex-to-complex backward FFT using vector API.
Definition heffte_fft3d.h:522
long long size_inbox() const
Returns the size of the inbox defined in the constructor.
Definition heffte_fft3d.h:326
void forward(int const batch_size, input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
An overload allowing for a batch of FFTs to be performed in a single command.
Definition heffte_fft3d.h:392
fft3d(box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Constructor creating a plan for FFT transform across the given communicator and using the box geometr...
Definition heffte_fft3d.h:270
void backward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a backward Fourier transform using two arrays.
Definition heffte_fft3d.h:469
void forward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a forward Fourier transform using two arrays.
Definition heffte_fft3d.h:354
buffer_container< typename transform_output< T, backend_tag >::type > output_buffer_container
Container of the output type corresponding to T, see the table of compatible input and output types.
Definition heffte_fft3d.h:255
double get_scale_factor(scale scaling) const
Returns the scale factor for the given scaling.
Definition heffte_fft3d.h:545
typename backend::buffer_traits< backend_tag >::location location_tag
Type-tag that is either tag::cpu or tag::gpu to indicate the location of the data.
Definition heffte_fft3d.h:260
real_buffer_container< scalar_type > backward_real(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Perform complex-to-real backward FFT using vector API (truncates the complex part).
Definition heffte_fft3d.h:536
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d.h:311
box3d< index > inbox() const
Returns the inbox.
Definition heffte_fft3d.h:330
box3d< index > outbox() const
Returns the outbox.
Definition heffte_fft3d.h:332
output_buffer_container< input_type > forward(buffer_container< input_type > const &input, scale scaling=scale::none)
Vector variant of forward() using input and output buffer_container classes.
Definition heffte_fft3d.h:441
long long size_outbox() const
Returns the size of the outbox defined in the constructor.
Definition heffte_fft3d.h:328
void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload with user-provided workspace buffer, see the corresponding overload of forward().
Definition heffte_fft3d.h:481
void backward(int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload for batch transforms with internally allocated workspace.
Definition heffte_fft3d.h:510
void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
An overload utilizing a user-allocated workspace buffer.
Definition heffte_fft3d.h:375
size_t size_workspace() const
Returns the workspace size that will be used, size is measured in complex numbers.
Definition heffte_fft3d.h:550
fft3d(typename backend::device_instance< location_tag >::stream_type gpu_stream, box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Identical to the other constructor but accepts a GPU stream or queue.
Definition heffte_fft3d.h:292
void backward(int const batch_size, input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload for batch transforms, see the corresponding overload of forward().
Definition heffte_fft3d.h:495
typename one_dim_backend< backend_tag >::executor backend_executor
Alias to the wrapper class for the one dimensional backend library.
Definition heffte_fft3d.h:243
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d.h:319
typename backend::buffer_traits< backend_tag >::template container< T > buffer_container
Alias to the container template associated with the backend.
Definition heffte_fft3d.h:251
buffer_container< typename transform_real_type< T, backend_tag >::type > real_buffer_container
Container of real values corresponding to the complex type T.
Definition heffte_fft3d.h:253
size_t size_comm_buffers() const
Returns the size used by the communication workspace buffers (internal use).
Definition heffte_fft3d.h:552
void forward(int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
An overload that allocates workspace internally.
Definition heffte_fft3d.h:407
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm, bool use_reorder, int algorithm, bool use_pencils)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d.h:301
fft3d< backend_tag, index > make_fft3d(box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Factory method that auto-detects the index type based on the box.
Definition heffte_fft3d.h:756
reshape_algorithm
Defines list of potential communication algorithms.
Definition heffte_plan_logic.h:48
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition heffte_plan_logic.h:207
plan_options default_options()
Returns the default backend options associated with the given backend.
Definition heffte_plan_logic.h:248
scale
Indicates the scaling factor to apply on the result of an FFT operation.
Definition heffte_fft3d.h:140
@ none
No scale, leave the result unperturbed similar to the FFTW API.
@ full
Apply the full scale, divide by the number of elements in the world box.
@ symmetric
Symmetric scaling, apply the square-root of the full scaling.
std::string name()
Returns the human readable name of the backend.
Definition heffte_common.h:265
std::vector< scalar_type > make_buffer_container(void *, size_t size)
Factory method to create new buffer container for the CPU backends.
Definition heffte_common.h:644
constexpr bool has_executor3d()
Defines whether the executor has a 3D version (single rank).
Definition heffte_common.h:739
constexpr bool has_executor2d()
Defines whether the executor has a 2D version (slabs).
Definition heffte_common.h:715
direction
Indicates the direction of the FFT (internal use only).
Definition heffte_common.h:652
size_t get_max_box_size(std::array< some_class, 3 > const &executors)
Returns the max of the box_size() for each of the executors.
Definition heffte_utils.h:403
size_t get_max_work_size(std::array< some_class, 3 > const &executors)
Returns the max of the workspace_size() for each of the executors.
Definition heffte_utils.h:424
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition heffte_plan_logic.cpp:425
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition heffte_backend_cuda.h:837
size_t get_workspace_size(std::array< std::unique_ptr< reshape3d_base< index > >, 4 > const &shapers)
Returns the maximum workspace size used by the shapers.
Definition heffte_reshape3d.h:115
std::unique_ptr< reshape3d_base< index > > make_reshape3d(typename backend::device_instance< typename backend::buffer_traits< backend_tag >::location >::stream_type stream, std::vector< box3d< index > > const &input_boxes, std::vector< box3d< index > > const &output_boxes, MPI_Comm const comm, plan_options const options)
Factory method to create a reshape3d instance.
Definition heffte_reshape3d.h:505
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
define_standard_type< scalar_type >::type * convert_to_standard(scalar_type input[])
Converts an array of some type to an array of the C++ equivalent type.
Definition heffte_utils.h:355
Defines the container for the temporary buffers.
Definition heffte_common.h:237
tag::cpu location
Tags the raw-array location tag::cpu or tag::gpu, used by the packers.
Definition heffte_common.h:239
Set to true/false type depending whether the types are compatible with the backend transform.
Definition heffte_common.h:445
Holds the auxiliary variables needed by each backend.
Definition heffte_common.h:408
void * stream_type
The type for the internal stream, the cpu uses just a void pointer.
Definition heffte_common.h:420
Allows to define whether a specific backend interface has been enabled.
Definition heffte_common.h:226
Defines whether the backend accepts the standard FFT real-complex or complex-complex transform.
Definition heffte_common.h:439
A generic container that describes a 3d box of indexes.
Definition heffte_geometry.h:67
Struct to specialize that returns the C++ equivalent of each type.
Definition heffte_utils.h:315
std::complex< double > type
The output for a double data is std::complex<double>
Definition heffte_fft3d.h:76
std::complex< float > type
The output for a float data is std::complex<float>
Definition heffte_fft3d.h:68
Defines the relationship between pairs of input-output types in the FFT algorithms.
Definition heffte_fft3d.h:58
scalar_type type
The output type corresponding to the scalar_type.
Definition heffte_fft3d.h:60
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition heffte_utils.h:252
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition heffte_utils.h:270
The logic plan incorporates the order and types of operations in a transform.
Definition heffte_plan_logic.h:276
Indicates the structure that will be used by the fft backend.
Definition heffte_common.h:663
Defines a set of tweaks and options to use in the plan generation.
Definition heffte_plan_logic.h:131
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition heffte_common.h:38
scalar_type type
The output type corresponding to the scalar_type and backend_tag (Cosine Transform case).
Definition heffte_fft3d.h:103
typename fft_output< scalar_type >::type type
The output type corresponding to the scalar_type and backend_tag (FFT case).
Definition heffte_fft3d.h:94
Defines the relationship between pairs of input-output types in a general transform algorithm.
Definition heffte_fft3d.h:86
scalar_type type
The output type corresponding to the scalar_type and backend_tag (r2r Transform case).
Definition heffte_fft3d.h:130
typename define_standard_type< scalar_type >::type::value_type type
The output type corresponding to the scalar_type and backend_tag (FFT case).
Definition heffte_fft3d.h:121
Defines the relationship between pairs of input-output types in a general transform algorithm.
Definition heffte_fft3d.h:113