Highly Efficient FFT for Exascale: HeFFTe v2.4
Loading...
Searching...
No Matches
heffte_r2r_executor.h
1/*
2 -- heFFTe --
3 Univ. of Tennessee, Knoxville
4 @date
5*/
6
7#ifndef HEFFFTE_COS_EXECUTOR_H
8#define HEFFFTE_COS_EXECUTOR_H
9
10#include "heffte_pack3d.h"
11
24
25namespace heffte {
26
31template<typename prepos_processor,typename index>
33 std::array<index, 3> high{box.size[0]-1, box.size[1]-1, box.size[2]-1};
34 high[box.order[0]] = prepos_processor::compute_extended_length(box.osize(0)) - 1;
35 return box3d<index>(std::array<index, 3>{0, 0, 0}, high, box.order);
36}
37
38
45 template<typename precision>
46 static void pre_forward(void*, int length, precision const input[], precision fft_signal[]){
47 for(int i = 0; i < length; i++){
48 fft_signal[2*i] = 0;
49 fft_signal[2*i+1] = input[i];
50 }
51 fft_signal[2*length] = 0;
52 for(int i = 0; i < 2*length; i++){
53 fft_signal[4*length-i] = fft_signal[i];
54 }
55 }
56
57 template<typename precision>
58 static void post_forward(void*, int length, std::complex<precision> const fft_result[], precision result[]){
59 for(int i = 0; i < length; i++){
60 result[i] = std::real(fft_result[i]);
61 }
62 }
63
64 template<typename precision>
65 static void pre_backward(void*, int length, precision const input[], std::complex<precision> fft_signal[]){
66 for(int i = 0; i < length; i++){
67 fft_signal[i] = std::complex<precision>(input[i]);
68 }
69 fft_signal[length] = 0.0;
70
71 int index = length-1;
72 for(int i = length+1; i < 2*length+1; i++){
73 fft_signal[i] = std::complex<precision>(-1.0 * input[index]);
74 index --;
75 }
76 }
77
78 template<typename precision>
79 static void post_backward(void*, int length, precision const fft_result[], precision result[]){
80 for(int i=0; i<length; i++)
81 result[i] = fft_result[2*i + 1];
82 }
83
84 static int compute_extended_length(int length){
85 return 4 * length;
86 }
87};
88
95 template<typename precision>
96 static void pre_forward(void*, int length, precision const input[], precision fft_signal[]){
97 for(int i=0; i<length; i++){
98 fft_signal[2*i] = 0.0;
99 fft_signal[2*i+1] = input[i];
100 }
101 fft_signal[2*length] = 0.;
102 for(int i=0; i<length; i++){
103 fft_signal[4*length-2*i] = 0.0;
104 fft_signal[4*length-2*i-1]= -input[i];
105 }
106 }
107
108 template<typename precision>
109 static void post_forward(void*, int length, std::complex<precision> const fft_result[], precision result[]){
110 for(int i=0; i < length; i++)
111 result[i] = -std::imag(fft_result[i+1]);
112 }
113
114 template<typename precision>
115 static void pre_backward(void*, int length, precision const input[], std::complex<precision> fft_signal[]){
116 fft_signal[0] = std::complex<precision>(0.0);
117 for(int i=0; i < length; i++){
118 fft_signal[i+1] = std::complex<precision>(0.0, -input[i]);
119 }
120 fft_signal[2*length] = std::complex<precision>(0.0);
121 for(int i=0; i < length-1; i++){
122 fft_signal[length + i + 1] = std::complex<precision>(0.0, -input[length - i - 2]);
123 }
124 }
125
126 template<typename precision>
127 static void post_backward(void*, int length, precision const fft_result[], precision result[]){
128 cpu_cos_pre_pos_processor::post_backward(nullptr, length, fft_result, result);
129 }
130
131 static int compute_extended_length(int length){
133 }
134};
135
142 template<typename precision>
143 static void pre_forward(void*, int length, precision const input[], precision fft_signal[]){
144 for (int i = 0; i < length-1; i++){
145 fft_signal[2*i] = input[i];
146 fft_signal[2*i+1] = 0.0;
147 }
148 for (int i = 1; i < length; i++){
149 fft_signal[4*(length-1)-2*i] = input[i];
150 fft_signal[4*(length-1)-2*i+1] = 0.0;
151 }
152 }
153
154 template<typename precision>
155 static void post_forward(void*, int length, std::complex<precision> const fft_result[], precision result[]){
156 cpu_cos_pre_pos_processor::post_forward(nullptr, length, fft_result, result);
157 }
158
159 template<typename precision>
160 static void pre_backward(void*, int length, precision const input[], std::complex<precision> fft_signal[]){
161 for (int i = 0; i < length; i++){
162 fft_signal[i] = std::complex<precision>(input[i], 0.0);
163 }
164 int index = length-2;
165 for (int i = length; i < 2*length-1; i++){
166 fft_signal[i] = std::complex<precision>(input[index], 0.0);
167 index--;
168 }
169 }
170
171 template<typename precision>
172 static void post_backward(void*, int length, precision const fft_result[], precision result[]){
173 for(int i=0; i<length; i++)
174 result[i] = fft_result[2*i];
175 }
176
177 static int compute_extended_length(int length){
178 return 4 * ( length-1 );
179 }
180};
181
183
191template<typename fft_backend_tag, typename prepost_processor>
197
199 template<typename index>
200 real2real_executor(typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type cstream, box3d<index> const box, int dimension) :
201 stream(cstream),
202 length(box.osize(0)),
203 extended_length(prepost_processor::compute_extended_length(box.osize(0))),
204 num_batch(box.osize(1) * box.osize(2)),
205 total_size(box.count()),
206 fft(make_executor_r2c<fft_backend_tag>(stream, make_extended_box<prepost_processor>(box), dimension))
207 {
208 assert(dimension == box.order[0]); // supporting only ordered operations (for now)
209 }
210
211 template<typename index>
212 real2real_executor(typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type cstream, box3d<index> const, int, int) : stream(cstream)
213 { throw std::runtime_error("2D real-to-real transform is not yet implemented!"); }
214
215 template<typename index>
217 { throw std::runtime_error("3D real-to-real transform is not yet implemented!"); }
218
220 template<typename scalar_type>
221 void forward(scalar_type data[], scalar_type workspace[]) const{
222 scalar_type* temp = align<fft_backend_tag>::pntr(workspace);
223 std::complex<scalar_type>* ctemp = align<fft_backend_tag>::pntr(reinterpret_cast<std::complex<scalar_type>*>(workspace + fft->box_size() + 1));
224 std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ? nullptr : ctemp + fft->complex_size();
225
226 for(int i=0; i<num_batch; i++){
227 prepost_processor::pre_forward(stream, length, data + i * length, temp + i * extended_length );
228 }
229 fft->forward(temp, ctemp, fft_work);
230 for(int i=0; i<num_batch; i++)
231 prepost_processor::post_forward(stream, length, ctemp + i * ( extended_length/2 + 1 ), data + i * length);
232 }
233
234 template<typename scalar_type>
235 void backward(scalar_type data[], scalar_type workspace[]) const{
236 scalar_type* temp = align<fft_backend_tag>::pntr(workspace);
237 std::complex<scalar_type>* ctemp = align<fft_backend_tag>::pntr(reinterpret_cast<std::complex<scalar_type>*>(workspace + fft->box_size() + 1));
238 std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ? nullptr : ctemp + fft->complex_size();
239
240 for(int i=0; i<num_batch; i++)
241 prepost_processor::pre_backward(stream, length, data + i * length, ctemp + i * ( extended_length/2 + 1));
242 fft->backward(ctemp, temp, fft_work);
243 for(int i=0; i<num_batch; i++)
244 prepost_processor::post_backward(stream, length, temp + i * extended_length, data + i * length);
245 }
246
248 template<typename precision>
249 void forward(precision const[], std::complex<precision>[]) const{
250 throw std::runtime_error("Calling cos-transform with real-to-complex data! This should not happen!");
251 }
252
253 template<typename precision>
254 void backward(std::complex<precision> indata[], precision outdata[]) const{ forward(outdata, indata); }
255
257 int box_size() const override{ return total_size; }
259 size_t workspace_size() const override{
260 return fft->box_size() + 2 * fft->complex_size() + 2 * fft->workspace_size()
261 + ((std::is_same<fft_backend_tag, backend::cufft>::value) ? 2 : 1 );
262 }
263
264 virtual void forward(float data[], float *workspace) const override{ forward<float>(data, workspace); }
266 virtual void forward(double data[], double *workspace) const override{ forward<double>(data, workspace); }
268 virtual void backward(float data[], float *workspace) const override{ backward<float>(data, workspace); }
270 virtual void backward(double data[], double *workspace) const override{ backward<double>(data, workspace); }
271
272private:
274
275 int length, extended_length, num_batch, total_size;
276
277 std::unique_ptr<typename one_dim_backend<fft_backend_tag>::executor_r2c> fft;
278};
279
280
281}
282
283#endif
Base class for all backend executors.
Definition heffte_common.h:561
virtual void backward(float[], float *) const
Backward r2r, single precision.
Definition heffte_common.h:570
virtual void forward(float[], float *) const
Forward r2r, single precision.
Definition heffte_common.h:566
@ backward
Inverse DFT transform.
Definition heffte_common.h:656
@ forward
Forward DFT transform.
Definition heffte_common.h:654
box3d< index > make_extended_box(box3d< index > const &box)
Create a box with larger dimension that will exploit the symmetry for the Sine and Cosine Transforms.
Definition heffte_r2r_executor.h:32
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
static float * pntr(float *p)
Align for float.
Definition heffte_common.h:604
tag::cpu location
Tags the raw-array location tag::cpu or tag::gpu, used by the packers.
Definition heffte_common.h:239
Holds the auxiliary variables needed by each backend.
Definition heffte_common.h:408
A generic container that describes a 3d box of indexes.
Definition heffte_geometry.h:67
std::array< index, 3 > const size
The number of indexes in each direction.
Definition heffte_geometry.h:129
std::array< int, 3 > const order
The order of the dimensions in the k * plane_stride + j * line_stride + i indexing.
Definition heffte_geometry.h:131
index osize(int dimension) const
Get the ordered size of the dimension, i.e., size[order[dimension]].
Definition heffte_geometry.h:123
Pre/Post processing for the Cosine transform type I using the CPU.
Definition heffte_r2r_executor.h:140
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition heffte_r2r_executor.h:143
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition heffte_r2r_executor.h:155
static int compute_extended_length(int length)
Computes the length of the extended signal.
Definition heffte_r2r_executor.h:177
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition heffte_r2r_executor.h:172
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition heffte_r2r_executor.h:160
Pre/Post processing for the Cosine transform using the CPU.
Definition heffte_r2r_executor.h:43
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition heffte_r2r_executor.h:58
static int compute_extended_length(int length)
Computes the length of the extended FFT signal.
Definition heffte_r2r_executor.h:84
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition heffte_r2r_executor.h:46
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition heffte_r2r_executor.h:79
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition heffte_r2r_executor.h:65
Definition heffte_r2r_executor.h:182
Pre/Post processing for the Sine transform using the CPU.
Definition heffte_r2r_executor.h:93
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition heffte_r2r_executor.h:109
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition heffte_r2r_executor.h:115
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition heffte_r2r_executor.h:127
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition heffte_r2r_executor.h:96
static int compute_extended_length(int length)
Computes the length of the extended FFT signal.
Definition heffte_r2r_executor.h:131
int box_size() const override
Returns the size of the box.
Definition heffte_r2r_executor.h:257
size_t workspace_size() const override
Returns the size of the box.
Definition heffte_r2r_executor.h:259
void forward(scalar_type data[], scalar_type workspace[]) const
Forward transform.
Definition heffte_r2r_executor.h:221
virtual void forward(double data[], double *workspace) const override
Forward r2r, double precision.
Definition heffte_r2r_executor.h:266
void backward(scalar_type data[], scalar_type workspace[]) const
Inverse transform.
Definition heffte_r2r_executor.h:235
virtual void backward(double data[], double *workspace) const override
Backward r2r, double precision.
Definition heffte_r2r_executor.h:270
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const box, int dimension)
Construct a plan for batch 1D transforms.
Definition heffte_r2r_executor.h:200
void backward(std::complex< precision > indata[], precision outdata[]) const
Placeholder for template type consistency, should never be called.
Definition heffte_r2r_executor.h:254
virtual void backward(float data[], float *workspace) const override
Backward r2r, single precision.
Definition heffte_r2r_executor.h:268
void forward(precision const[], std::complex< precision >[]) const
Placeholder for template type consistency, should never be called.
Definition heffte_r2r_executor.h:249
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const, int, int)
Construct a plan for batch 2D transforms, not implemented currently.
Definition heffte_r2r_executor.h:212
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const)
Construct a plan for a single 3D transform, not implemented currently.
Definition heffte_r2r_executor.h:216
virtual void forward(float data[], float *workspace) const override
Forward r2r, single precision.
Definition heffte_r2r_executor.h:264