Highly Efficient FFT for Exascale: HeFFTe v2.4
Loading...
Searching...
No Matches
heffte_fft3d_r2c.h
1/*
2 -- heFFTe --
3 Univ. of Tennessee, Knoxville
4 @date
5*/
6
7#ifndef HEFFTE_FFT3D_R2C_H
8#define HEFFTE_FFT3D_R2C_H
9
10#include "heffte_fft3d.h"
11
12namespace heffte {
13
45template<typename backend_tag, typename index = int>
46class fft3d_r2c : public backend::device_instance<typename backend::buffer_traits<backend_tag>::location>{
47public:
56
60 template<typename T> using buffer_container = typename backend::buffer_traits<backend_tag>::template container<T>;
65
76 fft3d_r2c(box3d<index> const inbox, box3d<index> const outbox, int r2c_direction, MPI_Comm const comm,
78 fft3d_r2c(plan_operations(mpi::gather_boxes(inbox, outbox, comm), r2c_direction, set_options<backend_tag, true>(options), mpi::comm_rank(comm)), comm){
79 assert(r2c_direction == 0 or r2c_direction == 1 or r2c_direction == 2);
80 static_assert(backend::is_enabled<backend_tag>::value, "The requested backend is invalid or has not been enabled.");
81 }
82
86 box3d<index> const inbox, box3d<index> const outbox, int r2c_direction, MPI_Comm const comm,
88 fft3d_r2c(gpu_stream,
89 plan_operations(mpi::gather_boxes(inbox, outbox, comm), r2c_direction, set_options<backend_tag, true>(options), mpi::comm_rank(comm)),
90 comm){
91 assert(r2c_direction == 0 or r2c_direction == 1 or r2c_direction == 2);
92 static_assert(backend::is_enabled<backend_tag>::value, "The requested backend is invalid or has not been enabled.");
93 }
94
96 fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2,
97 int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2,
98 int r2c_direction, MPI_Comm const comm,
99 bool use_reorder, int algorithm, bool use_pencils)
100 : fft3d_r2c(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
101 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
102 r2c_direction, comm,
103 plan_options(use_reorder, static_cast<reshape_algorithm>(algorithm), use_pencils))
104 {}
105
106 fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2,
107 int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2,
108 int r2c_direction, MPI_Comm const comm)
109 : fft3d_r2c(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
110 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
111 r2c_direction, comm)
112 {}
113
114 fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2,
115 int ol0, int ol1, int ol2, int oh0, int oh1, int oh2,
116 int r2c_direction, MPI_Comm const comm)
117 : fft3d_r2c(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}), box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}), r2c_direction, comm)
118 {}
119
121 long long size_inbox() const{ return pinbox->count(); }
123 long long size_outbox() const{ return poutbox->count(); }
125 box3d<index> inbox() const{ return *pinbox; }
127 box3d<index> outbox() const{ return *poutbox; }
129 size_t size_workspace() const{ return size_buffer_work; }
131 size_t size_comm_buffers() const{ return comm_buffer_offset; }
132
146 template<typename input_type, typename output_type>
147 void forward(input_type const input[], output_type output[], scale scaling = scale::none) const{
148 static_assert((std::is_same<input_type, float>::value and is_ccomplex<output_type>::value)
149 or (std::is_same<input_type, double>::value and is_zcomplex<output_type>::value),
150 "Using either an unknown complex type or an incompatible pair of types!");
151
152 auto workspace = make_buffer_container<output_type>(this->stream(), size_workspace());
153 forward(input, output, workspace.data(), scaling);
154 }
155
157 template<typename input_type, typename output_type>
158 void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling = scale::none) const{
159 static_assert((std::is_same<input_type, float>::value and is_ccomplex<output_type>::value)
160 or (std::is_same<input_type, double>::value and is_zcomplex<output_type>::value),
161 "Using either an unknown complex type or an incompatible pair of types!");
162
163 compute_transform<location_tag, index>(this->stream(), 1, convert_to_standard(input), convert_to_standard(output),
164 convert_to_standard(workspace),
165 executor_buffer_offset, size_comm_buffers(), forward_shaper,
166 forward_executors(), direction::forward);
167 apply_scale(1, direction::forward, scaling, convert_to_standard(output));
168 }
169
170 template<typename input_type, typename output_type>
171 void forward(int batch_size, input_type const input[], output_type output[],
172 output_type workspace[], scale scaling = scale::none) const{
173 static_assert((std::is_same<input_type, float>::value and is_ccomplex<output_type>::value)
174 or (std::is_same<input_type, double>::value and is_zcomplex<output_type>::value),
175 "Using either an unknown complex type or an incompatible pair of types!");
176
177 compute_transform<location_tag, index>(this->stream(), batch_size, convert_to_standard(input), convert_to_standard(output),
178 convert_to_standard(workspace),
179 executor_buffer_offset, size_comm_buffers(), forward_shaper,
180 forward_executors(), direction::forward);
181 apply_scale(batch_size, direction::forward, scaling, convert_to_standard(output));
182 }
183
184 template<typename input_type, typename output_type>
185 void forward(int batch_size, input_type const input[], output_type output[], scale scaling = scale::none) const{
186 static_assert((std::is_same<input_type, float>::value and is_ccomplex<output_type>::value)
187 or (std::is_same<input_type, double>::value and is_zcomplex<output_type>::value),
188 "Using either an unknown complex type or an incompatible pair of types!");
189
190 auto workspace = make_buffer_container<output_type>(this->stream(), batch_size * size_workspace());
191 forward(batch_size, input, output, workspace.data(), scaling);
192 }
193
210 template<typename input_type>
212 if (input.size() < static_cast<size_t>(size_inbox()))
213 throw std::invalid_argument("The input vector is smaller than size_inbox(), i.e., not enough entries provided to fill the inbox.");
214 static_assert(std::is_same<input_type, float>::value or std::is_same<input_type, double>::value,
215 "The input to forward() must be real, i.e., either float or double.");
217 forward(input.data(), output.data(), scaling);
218 return output;
219 }
220
234 template<typename input_type, typename output_type>
235 void backward(input_type const input[], output_type output[], scale scaling = scale::none) const{
236 static_assert((std::is_same<output_type, float>::value and is_ccomplex<input_type>::value)
237 or (std::is_same<output_type, double>::value and is_zcomplex<input_type>::value),
238 "Using either an unknown complex type or an incompatible pair of types!");
239
240 auto workspace = make_buffer_container<input_type>(this->stream(), size_workspace());
241 backward(input, output, workspace.data(), scaling);
242 }
243
245 template<typename input_type, typename output_type>
246 void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling = scale::none) const{
247 static_assert((std::is_same<output_type, float>::value and is_ccomplex<input_type>::value)
248 or (std::is_same<output_type, double>::value and is_zcomplex<input_type>::value),
249 "Using either an unknown complex type or an incompatible pair of types!");
250
251 compute_transform<location_tag, index>(this->stream(), 1, convert_to_standard(input), convert_to_standard(output),
252 convert_to_standard(workspace),
253 executor_buffer_offset, size_comm_buffers(), backward_shaper,
254 backward_executors(), direction::backward);
255 apply_scale(1, direction::backward, scaling, convert_to_standard(output));
256 }
257
258 template<typename input_type, typename output_type>
259 void backward(int batch_size, input_type const input[], output_type output[],
260 input_type workspace[], scale scaling = scale::none) const{
261 static_assert((std::is_same<output_type, float>::value and is_ccomplex<input_type>::value)
262 or (std::is_same<output_type, double>::value and is_zcomplex<input_type>::value),
263 "Using either an unknown complex type or an incompatible pair of types!");
264
265 compute_transform<location_tag, index>(this->stream(), batch_size, convert_to_standard(input), convert_to_standard(output),
266 convert_to_standard(workspace),
267 executor_buffer_offset, size_comm_buffers(), backward_shaper,
268 backward_executors(), direction::backward);
269 apply_scale(batch_size, direction::backward, scaling, convert_to_standard(output));
270 }
271
272 template<typename input_type, typename output_type>
273 void backward(int batch_size, input_type const input[], output_type output[], scale scaling = scale::none) const{
274 static_assert((std::is_same<output_type, float>::value and is_ccomplex<input_type>::value)
275 or (std::is_same<output_type, double>::value and is_zcomplex<input_type>::value),
276 "Using either an unknown complex type or an incompatible pair of types!");
277
278 auto workspace = make_buffer_container<input_type>(this->stream(), batch_size * size_workspace());
279 backward(batch_size, input, output, workspace.data(), scaling);
280 }
281
285 template<typename scalar_type>
288 "Either calling backward() with non-complex input or using an unknown complex type.");
290 backward(input.data(), result.data(), scaling);
291 return result;
292 }
293
297 double get_scale_factor(scale scaling) const{ return (scaling == scale::symmetric) ? std::sqrt(scale_factor) : scale_factor; }
298
299private:
301 fft3d_r2c(logic_plan3d<index> const &plan, MPI_Comm const comm) :
302 pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
303 scale_factor(1.0 / static_cast<double>(plan.index_count))
304 #ifdef Heffte_ENABLE_MAGMA
305 , hmagma(this->stream())
306 #endif
307 {
308 setup(plan, comm);
309 }
310
313 logic_plan3d<index> const &plan, MPI_Comm const comm) :
314 backend::device_instance<location_tag>(gpu_stream),
315 pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
316 scale_factor(1.0 / static_cast<double>(plan.index_count))
317 #ifdef Heffte_ENABLE_MAGMA
318 , hmagma(this->stream())
319 #endif
320 {
321 setup(plan, comm);
322 }
323
325 void setup(logic_plan3d<index> const &plan, MPI_Comm const comm){
326 for(int i=0; i<4; i++){
327 forward_shaper[i] = make_reshape3d<backend_tag>(this->stream(), plan.in_shape[i], plan.out_shape[i], comm, plan.options);
328 backward_shaper[3-i] = make_reshape3d<backend_tag>(this->stream(), plan.out_shape[i], plan.in_shape[i], comm, plan.options);
329 }
330
331 executors[0] = make_executor_r2c<backend_tag>(this->stream(), plan.out_shape[0][mpi::comm_rank(comm)], plan.fft_direction[0]);
332 executors[1] = make_executor<backend_tag>(this->stream(), plan.out_shape[1][mpi::comm_rank(comm)], plan.fft_direction[1]);
333 executors[2] = make_executor<backend_tag>(this->stream(), plan.out_shape[2][mpi::comm_rank(comm)], plan.fft_direction[2]);
334
335 size_t executor_workspace_size = get_max_work_size(executors);
336 comm_buffer_offset = std::max(get_workspace_size(forward_shaper), get_workspace_size(backward_shaper));
337 size_buffer_work = comm_buffer_offset
338 + get_max_box_size_r2c(executors) + executor_workspace_size;
339 executor_buffer_offset = (executor_workspace_size == 0) ? 0 : size_buffer_work - executor_workspace_size;
340 }
342 std::array<executor_base*, 3> forward_executors() const{
343 return std::array<executor_base*, 3>{executors[0].get(), executors[1].get(), executors[2].get()};
344 }
346 std::array<executor_base*, 3> backward_executors() const{
347 return std::array<executor_base*, 3>{executors[2].get(), executors[1].get(), executors[0].get()};
348 }
349
351 template<typename scalar_type>
352 void apply_scale(int const batch_size, direction dir, scale scaling, scalar_type data[]) const{
353 if (scaling != scale::none){
354 add_trace name("scale");
355 #ifdef Heffte_ENABLE_MAGMA
356 if (std::is_same<location_tag, tag::gpu>::value){
357 hmagma.scal(batch_size * ((dir == direction::forward) ? size_outbox() : size_inbox()), get_scale_factor(scaling), data);
358 return;
359 }
360 #endif
362 this->stream(),
363 batch_size * ((dir == direction::forward) ? size_outbox() : size_inbox()),
364 data, get_scale_factor(scaling));
365 }
366 }
367
368 std::unique_ptr<box3d<index>> pinbox, poutbox;
369 double scale_factor;
370 std::array<std::unique_ptr<reshape3d_base<index>>, 4> forward_shaper;
371 std::array<std::unique_ptr<reshape3d_base<index>>, 4> backward_shaper;
372
373 std::array<std::unique_ptr<executor_base>, 3> executors;
374 #ifdef Heffte_ENABLE_MAGMA
375 gpu::magma_handle<location_tag> hmagma;
376 #endif
377
378 // cache some values for faster read
379 size_t size_buffer_work, comm_buffer_offset, executor_buffer_offset;
380};
381
386template<typename backend_tag, typename index = int>
388
393template<typename backend_tag, typename index>
395 int r2c_direction, MPI_Comm const comm,
397 static_assert(std::is_same<index, int>::value or std::is_same<index, long long>::value,
398 "heFFTe works with 'int' and 'long long' indexing only");
400 "The backend_tag is not valid, perhaps it needs to be enabled in the build system");
401 return fft3d_r2c<backend_tag, index>(inbox, outbox, r2c_direction, comm, options);
402}
403
404}
405
406#endif
Similar to heffte::fft3d, but computed fewer redundant coefficients when the input is real.
Definition heffte_fft3d_r2c.h:46
buffer_container< typename define_standard_type< T >::type::value_type > real_buffer_container
Container of real values corresponding to the complex type T.
Definition heffte_fft3d_r2c.h:62
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, int r2c_direction, MPI_Comm const comm, bool use_reorder, int algorithm, bool use_pencils)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d_r2c.h:96
void forward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a forward Fourier transform using two arrays.
Definition heffte_fft3d_r2c.h:147
typename backend::buffer_traits< backend_tag >::template container< T > buffer_container
Alias to the container template associated with the backend (allows for RAII memory management).
Definition heffte_fft3d_r2c.h:60
size_t size_comm_buffers() const
Returns the size used by the communication workspace buffers (internal use).
Definition heffte_fft3d_r2c.h:131
void backward(int batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload that performs a batch transform using internally allocated workspace.
Definition heffte_fft3d_r2c.h:273
typename one_dim_backend< backend_tag >::executor_r2c backend_executor_r2c
FFT executor for the real-to-complex dimension.
Definition heffte_fft3d_r2c.h:51
box3d< index > inbox() const
Definition heffte_fft3d_r2c.h:125
fft3d_r2c(typename backend::device_instance< location_tag >::stream_type gpu_stream, box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
See the documentation for fft3d::fft3d()
Definition heffte_fft3d_r2c.h:85
long long size_inbox() const
Returns the size of the inbox defined in the constructor.
Definition heffte_fft3d_r2c.h:121
fft3d_r2c(box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Constructor creating a plan for FFT transform across the given communicator and using the box geometr...
Definition heffte_fft3d_r2c.h:76
typename backend::buffer_traits< backend_tag >::location location_tag
Type-tag that is either tag::cpu or tag::gpu to indicate the location of the data.
Definition heffte_fft3d_r2c.h:55
long long size_outbox() const
Returns the size of the outbox defined in the constructor.
Definition heffte_fft3d_r2c.h:123
buffer_container< typename fft_output< T >::type > output_buffer_container
Container of the output type corresponding to T, see the table of compatible input and output types.
Definition heffte_fft3d_r2c.h:64
box3d< index > outbox() const
Definition heffte_fft3d_r2c.h:127
void forward(int batch_size, input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
Overload utilizing a batch transform.
Definition heffte_fft3d_r2c.h:171
output_buffer_container< input_type > forward(buffer_container< input_type > const &input, scale scaling=scale::none)
Vector variant of forward() using input and output buffer_container classes.
Definition heffte_fft3d_r2c.h:211
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int r2c_direction, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d_r2c.h:114
void backward(int batch_size, input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload that performs a batch transform.
Definition heffte_fft3d_r2c.h:259
real_buffer_container< scalar_type > backward(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Variant of backward() that uses buffer_container for RAII style of resource management.
Definition heffte_fft3d_r2c.h:286
typename one_dim_backend< backend_tag >::executor backend_executor_c2c
FFT executor for the complex-to-complex dimensions.
Definition heffte_fft3d_r2c.h:49
void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
Overload utilizing a user provided buffer.
Definition heffte_fft3d_r2c.h:158
size_t size_workspace() const
Returns the workspace size that will be used, size is measured in complex numbers.
Definition heffte_fft3d_r2c.h:129
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, int r2c_direction, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition heffte_fft3d_r2c.h:106
void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload utilizing a user provided buffer.
Definition heffte_fft3d_r2c.h:246
void forward(int batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload utilizing a batch transform using internally allocated workspace.
Definition heffte_fft3d_r2c.h:185
double get_scale_factor(scale scaling) const
Returns the scale factor for the given scaling.
Definition heffte_fft3d_r2c.h:297
void backward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a backward Fourier transform using two arrays.
Definition heffte_fft3d_r2c.h:235
reshape_algorithm
Defines list of potential communication algorithms.
Definition heffte_plan_logic.h:48
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition heffte_plan_logic.h:207
plan_options default_options()
Returns the default backend options associated with the given backend.
Definition heffte_plan_logic.h:248
fft3d_r2c< backend_tag, index > fft2d_r2c
Alias of heffte::fft2d to be used for a two dimensional problem.
Definition heffte_fft3d_r2c.h:387
scale
Indicates the scaling factor to apply on the result of an FFT operation.
Definition heffte_fft3d.h:140
fft3d_r2c< backend_tag, index > make_fft3d_r2c(box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Factory method that auto-detects the index type based on the box.
Definition heffte_fft3d_r2c.h:394
@ none
No scale, leave the result unperturbed similar to the FFTW API.
Definition heffte_fft3d.h:142
@ symmetric
Symmetric scaling, apply the square-root of the full scaling.
Definition heffte_fft3d.h:146
std::string name()
Returns the human readable name of the backend.
Definition heffte_common.h:265
std::vector< scalar_type > make_buffer_container(void *, size_t size)
Factory method to create new buffer container for the CPU backends.
Definition heffte_common.h:644
direction
Indicates the direction of the FFT (internal use only).
Definition heffte_common.h:652
size_t get_max_box_size_r2c(std::array< some_class, 3 > const &executors)
Returns the max of the box_size() for each of the executors.
Definition heffte_utils.h:414
size_t get_max_work_size(std::array< some_class, 3 > const &executors)
Returns the max of the workspace_size() for each of the executors.
Definition heffte_utils.h:424
@ backward
Inverse DFT transform.
Definition heffte_common.h:656
@ forward
Forward DFT transform.
Definition heffte_common.h:654
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition heffte_plan_logic.cpp:425
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition heffte_backend_cuda.h:837
int comm_rank(MPI_Comm const comm)
Returns the rank of this process within the specified comm.
Definition heffte_utils.h:79
size_t get_workspace_size(std::array< std::unique_ptr< reshape3d_base< index > >, 4 > const &shapers)
Returns the maximum workspace size used by the shapers.
Definition heffte_reshape3d.h:115
std::unique_ptr< reshape3d_base< index > > make_reshape3d(typename backend::device_instance< typename backend::buffer_traits< backend_tag >::location >::stream_type stream, std::vector< box3d< index > > const &input_boxes, std::vector< box3d< index > > const &output_boxes, MPI_Comm const comm, plan_options const options)
Factory method to create a reshape3d instance.
Definition heffte_reshape3d.h:505
Wrappers to miscellaneous MPI methods giving a more C++-ish interface.
Definition heffte_geometry.h:692
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
define_standard_type< scalar_type >::type * convert_to_standard(scalar_type input[])
Converts an array of some type to an array of the C++ equivalent type.
Definition heffte_utils.h:355
Defines the container for the temporary buffers.
Definition heffte_common.h:237
tag::cpu location
Tags the raw-array location tag::cpu or tag::gpu, used by the packers.
Definition heffte_common.h:239
Holds the auxiliary variables needed by each backend.
Definition heffte_common.h:408
void * stream_type
The type for the internal stream, the cpu uses just a void pointer.
Definition heffte_common.h:420
Allows to define whether a specific backend interface has been enabled.
Definition heffte_common.h:226
A generic container that describes a 3d box of indexes.
Definition heffte_geometry.h:67
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition heffte_utils.h:252
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition heffte_utils.h:270
The logic plan incorporates the order and types of operations in a transform.
Definition heffte_plan_logic.h:276
Indicates the structure that will be used by the fft backend.
Definition heffte_common.h:663
Defines a set of tweaks and options to use in the plan generation.
Definition heffte_plan_logic.h:131