7#ifndef HEFFTE_BACKEND_STOCK_FFT_H
8#define HEFFTE_BACKEND_STOCK_FFT_H
10#include "heffte_r2r_executor.h"
12#include "stock_fft/heffte_stock_tree.h"
54template<>
struct is_ccomplex<stock::Complex<float, 1>> : std::true_type{};
55template<>
struct is_zcomplex<stock::Complex<double, 1>> : std::true_type{};
56#ifdef Heffte_ENABLE_AVX
61template<>
struct is_ccomplex<stock::Complex<float, 4>> : std::true_type{};
62template<>
struct is_ccomplex<stock::Complex<float, 8>> : std::true_type{};
63#ifdef Heffte_ENABLE_AVX512
64template<>
struct is_ccomplex<stock::Complex<float, 16>> : std::true_type{};
71template<>
struct is_zcomplex<stock::Complex<double, 2>> : std::true_type{};
72template<>
struct is_zcomplex<stock::Complex<double, 4>> : std::true_type{};
73#ifdef Heffte_ENABLE_AVX512
74template<>
struct is_zcomplex<stock::Complex<double, 8>> : std::true_type{};
78#ifdef Heffte_ENABLE_AVX
81template<
typename F,
int L>
84 throw std::runtime_error(
"Invalid padding requested!\n");
86 std::complex<F> ret[L];
87 for(
int i = 0; i < c_len; i++) ret[i] = c[i*i_stride];
91template<
typename F,
int L>
94 throw std::runtime_error(
"Invalid padding requested!\n");
96 std::complex<F> ret[L];
97 for(
int i = 0; i < c_len; i++) ret[i] = std::complex<F>(c[i*i_stride], 0.0);
101template<
typename F>
struct pack_size { };
102#ifdef Heffte_ENABLE_AVX512
103template<>
struct pack_size<float> {
constexpr static int size = 16;};
104template<>
struct pack_size<double>{
constexpr static int size = 8;};
106template<>
struct pack_size<float> {
constexpr static int size = 8;};
107template<>
struct pack_size<double>{
constexpr static int size = 4;};
119template<
typename F, direction dir>
130 plan_stock_fft(
int size,
int howmanyffts,
int stride,
int rdist,
int cdist):
131 N(size), num_ffts(howmanyffts), stride_sz(stride), real_d(rdist), comp_d(cdist) {
132 numNodes = stock::getNumNodes(
N);
133 root = std::unique_ptr<stock::biFuncNode<F,L>[]>(
new stock::biFuncNode<F,L>[numNodes]);
134 init_fft_tree(root.get(),
N);
137 int N, num_ffts, stride_sz, real_d, comp_d, numNodes;
138 constexpr static int L = pack_size<F>::size;
139 std::unique_ptr<stock::biFuncNode<F,L>[]> root;
141 void execute(std::complex<F>
const idata[], F odata[]) {
143 stock::complex_vector<F,L> inp (
N);
144 stock::complex_vector<F,L> out (
N);
147 for(
int p = 0; p+((L/2)-1) < num_ffts; p += (L/2)) {
149 inp[0] = stock::Complex<F,L> (&idata[p*comp_d], comp_d);
150 for(
int i = 1; i < (
N+2)/2; i++) {
151 int idx = p*comp_d + i*stride_sz;
152 inp[i] = stock::Complex<F,L> (&idata[idx], comp_d);
153 inp[
N-i] = inp[i].conjugate();
156 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
158 for(
int i = 0; i <
N; i++) {
159 int idx = p*real_d + i*stride_sz;
160 stock::Complex<F,L> arr = out[i];
161 for(
int j = 0; j < L/2; j++) odata[idx+j*real_d] = arr[j].real();
166 if(num_ffts % (L/2) > 0) {
167 int rem = num_ffts % (L/2);
169 int p = num_ffts - rem;
170 inp[0] = copy_pad<F,L>(&idata[p*comp_d], rem, comp_d);
171 for(
int i = 1; i < (
N+2)/2; i++) {
172 int idx = p*comp_d + i*stride_sz;
173 inp[i] = copy_pad<F,L>(&idata[idx], rem, comp_d);
174 inp[
N-i] = inp[i].conjugate();
176 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
177 for(
int i = 0; i <
N; i++) {
178 int idx = p*real_d + i*stride_sz;
179 stock::Complex<F,L> arr = out[i];
181 for(
int k = 0; k < rem; k++) {
182 odata[idx + k*real_d] = arr[k].real();
188 void execute(F
const idata[], std::complex<F> odata[]) {
190 stock::complex_vector<F,L> inp (
N);
191 stock::complex_vector<F,L> out (
N);
194 for(
int p = 0; p+((L/2)-1) < num_ffts; p += L/2) {
196 for(
int i = 0; i <
N; i++) {
197 int idx = p*real_d + i*stride_sz;
198 inp[i] = copy_pad<F,L>(&idata[idx], L/2, real_d);
201 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
203 for(
int i = 0; i < (
N+2)/2; i++) {
204 int idx = p*comp_d + i*stride_sz;
205 stock::Complex<F,L> arr = out[i];
206 for(
int j = 0; j < L/2; j++) odata[idx+j*comp_d] = arr[j];
211 if(num_ffts % (L/2) > 0) {
212 int rem = num_ffts % (L/2);
214 int p = num_ffts - rem;
215 for(
int i = 0; i <
N; i++) {
216 int idx = p*real_d + i*stride_sz;
218 inp[i] = copy_pad<F,L>(&idata[idx], rem, real_d);
220 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
221 for(
int i = 0; i < (
N+2)/2; i++) {
222 int idx = p*comp_d + i*stride_sz;
223 stock::Complex<F,L> arr = out[i];
225 for(
int k = 0; k < rem; k++) {
226 odata[idx + k*comp_d] = arr[k];
239template<
typename F, direction dir>
250 N(size), num_ffts(howmanyffts), stride_sz(stride), idist(dist), odist(dist) {
251 numNodes = stock::getNumNodes(
N);
252 root = std::unique_ptr<stock::biFuncNode<F,L>[]>(
new stock::biFuncNode<F,L>[numNodes]);
253 init_fft_tree(root.get(),
N);
256 int N, num_ffts, stride_sz, idist, odist, numNodes;
257 constexpr static int L = pack_size<F>::size;
258 std::unique_ptr<stock::biFuncNode<F, L>[]> root;
260 void execute(std::complex<F> data[]) {
262 stock::complex_vector<F,L> inp (
N);
263 stock::complex_vector<F,L> out (
N);
266 for(
int p = 0; p + (L/2 - 1) < num_ffts; p += L/2) {
268 for(
int i = 0; i <
N; i++) {
269 int idx = p*idist + i*stride_sz;
270 inp[i] = stock::Complex<F,L> (&data[idx], idist);
273 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
275 for(
int i = 0; i <
N; i++) {
276 int idx = p*odist + i*stride_sz;
277 stock::Complex<F,L> arr = out[i];
278 for(
int j = 0; j < L/2; j++) data[idx+j*odist] = arr[j];
283 if(num_ffts % (L/2) > 0) {
284 int rem = num_ffts % (L/2);
286 int p = num_ffts - rem;
287 for(
int i = 0; i <
N; i++) {
288 int idx = p*idist + i*stride_sz;
290 inp[i] = copy_pad<F,L>(&data[idx], rem, idist);
292 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
293 for(
int i = 0; i <
N; i++) {
294 int idx = p*odist + i*stride_sz;
295 stock::Complex<F,L> arr = out[i];
297 for(
int k = 0; k < rem; k++) {
298 data[idx + k*odist] = arr[k];
313template<
typename F, direction dir>
325 N(size), num_ffts(howmanyffts), stride_sz(stride), real_d(rdist), comp_d(cdist) {
326 numNodes = stock::getNumNodes(
N);
328 init_fft_tree(root.get(),
N);
331 int N, num_ffts, stride_sz, real_d, comp_d, numNodes;
332 std::unique_ptr<stock::biFuncNode<F, 1>[]> root;
334 void execute(std::complex<F>
const idata[], F odata[]) {
336 stock::complex_vector<F,1> inp (
N);
337 stock::complex_vector<F,1> out (
N);
340 for(
int p = 0; p < num_ffts; p++) {
343 for(
int i = 1; i < (
N+2)/2; i++) {
344 int idx = p*comp_d + i*stride_sz;
349 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
351 for(
int i = 0; i <
N; i++) {
352 int idx = p*real_d + i*stride_sz;
354 odata[idx+0*real_d] = arr[0].real();
359 void execute(F
const idata[], std::complex<F> odata[]) {
361 stock::complex_vector<F,1> inp (
N);
362 stock::complex_vector<F,1> out (
N);
365 for(
int p = 0; p < num_ffts; p++) {
367 for(
int i = 0; i <
N; i++) {
368 int idx = p*real_d + i*stride_sz;
372 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
374 for(
int i = 0; i < (
N+2)/2; i++) {
375 int idx = p*comp_d + i*stride_sz;
377 odata[idx+0*comp_d] = arr[0];
389template<
typename F, direction dir>
400 N(size), num_ffts(howmanyffts), stride_sz(stride), idist(dist), odist(dist) {
401 numNodes = stock::getNumNodes(N);
403 init_fft_tree(root.get(), N);
406 int N, num_ffts, stride_sz, idist, odist, numNodes;
407 std::unique_ptr<stock::biFuncNode<F,1>[]> root;
411 stock::complex_vector<F,1> inp (N);
412 stock::complex_vector<F,1> out (N);
415 for(
int p = 0; p < num_ffts; p++) {
417 for(
int i = 0; i < N; i++) {
418 int idx = p*idist + i*stride_sz;
422 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
424 for(
int i = 0; i < N; i++) {
425 int idx = p*odist + i*stride_sz;
427 data[idx+0*odist] = arr[0];
455 template<
typename index>
457 size(box.size[dimension]),
460 dist((dimension == box.order[0]) ? size : 1),
461 blocks((dimension == box.order[1]) ? box.osize(2) : 1),
462 block_stride(box.osize(0) * box.osize(1)),
463 total_size(box.count())
467 throw std::runtime_error(
"2D transforms for the stock backend are not available yet!");
471 throw std::runtime_error(
"3D transforms for the stock backend are not available yet!");
475 void forward(std::complex<float> data[], std::complex<float>*)
const override{
477 for(
int i=0; i<blocks; i++) {
478 cforward->execute(data + i * block_stride);
482 void backward(std::complex<float> data[], std::complex<float>*)
const override{
483 make_plan(cbackward);
484 for(
int i=0; i<blocks; i++) {
485 cbackward->execute(data + i * block_stride);
489 void forward(std::complex<double> data[], std::complex<double>*)
const override{
491 for(
int i=0; i<blocks; i++) {
492 zforward->execute(data + i * block_stride);
496 void backward(std::complex<double> data[], std::complex<double>*)
const override{
497 make_plan(zbackward);
498 for(
int i=0; i<blocks; i++) {
499 zbackward->execute(data + i * block_stride);
504 void forward(
float const indata[], std::complex<float> outdata[], std::complex<float> *workspace)
const override{
505 for(
int i=0; i<total_size; i++) outdata[i] = std::complex<float>(indata[i]);
509 void backward(std::complex<float> indata[],
float outdata[], std::complex<float> *workspace)
const override{
511 for(
int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
514 void forward(
double const indata[], std::complex<double> outdata[], std::complex<double> *workspace)
const override{
515 for(
int i=0; i<total_size; i++) outdata[i] = std::complex<double>(indata[i]);
519 void backward(std::complex<double> indata[],
double outdata[], std::complex<double> *workspace)
const override{
521 for(
int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
525 int box_size()
const override{
return total_size; }
532 template<
typename scalar_type, direction dir>
537 int size, num_ffts, stride, dist, blocks, block_stride, total_size;
538 mutable std::unique_ptr<plan_stock_fft<std::complex<float>,
direction::forward>> cforward;
539 mutable std::unique_ptr<plan_stock_fft<std::complex<float>,
direction::backward>> cbackward;
540 mutable std::unique_ptr<plan_stock_fft<std::complex<double>,
direction::forward>> zforward;
541 mutable std::unique_ptr<plan_stock_fft<std::complex<double>,
direction::backward>> zbackward;
563 template<
typename index>
565 size(box.size[dimension]),
568 blocks((dimension == box.order[1]) ? box.osize(2) : 1),
569 rdist((dimension == box.order[0]) ? size : 1),
570 cdist((dimension == box.order[0]) ? size/2 + 1 : 1),
571 rblock_stride(box.osize(0) * box.osize(1)),
572 cblock_stride(box.osize(0) * (box.osize(1)/2 + 1)),
574 csize(box.r2c(dimension).count())
578 void forward(
float const indata[], std::complex<float> outdata[], std::complex<float>*)
const override{
580 for(
int i=0; i<blocks; i++) {
581 sforward->execute(indata + i*rblock_stride, outdata + i*cblock_stride);
585 void backward(std::complex<float> indata[],
float outdata[], std::complex<float>*)
const override{
586 make_plan(sbackward);
587 for(
int i=0; i<blocks; i++) {
588 sbackward->execute(indata + i*cblock_stride, outdata + i*rblock_stride);
592 void forward(
double const indata[], std::complex<double> outdata[], std::complex<double>*)
const override{
594 for(
int i=0; i<blocks; i++) {
595 dforward->execute(indata + i*rblock_stride, outdata + i*cblock_stride);
599 void backward(std::complex<double> indata[],
double outdata[], std::complex<double>*)
const override{
600 make_plan(dbackward);
601 for(
int i=0; i<blocks; i++) {
602 dbackward->execute(indata + i*cblock_stride, outdata + i*rblock_stride);
615 template<
typename scalar_type, direction dir>
620 int size, num_ffts, stride, blocks;
621 int rdist, cdist, rblock_stride, cblock_stride, rsize, csize;
622 mutable std::unique_ptr<plan_stock_fft<float, direction::forward>> sforward;
623 mutable std::unique_ptr<plan_stock_fft<double, direction::forward>> dforward;
624 mutable std::unique_ptr<plan_stock_fft<float, direction::backward>> sbackward;
625 mutable std::unique_ptr<plan_stock_fft<double, direction::backward>> dbackward;
Base class for all backend executors.
Definition heffte_common.h:561
virtual int complex_size() const
Return the size of the complex-box (r2c executors).
Definition heffte_common.h:594
virtual void backward(float[], float *) const
Backward r2r, single precision.
Definition heffte_common.h:570
virtual void forward(float[], float *) const
Forward r2r, single precision.
Definition heffte_common.h:566
Custom complex type taking advantage of vectorization A Complex Type intrinsic to HeFFTe that takes a...
Definition heffte_stock_complex.h:29
Complex< F, L > conjugate()
Conjugate the current complex number.
Definition heffte_stock_complex.h:183
Wrapper to stock API for real-to-complex transform with shortening of the data.
Definition heffte_backend_stock.h:552
int box_size() const override
Returns the size of the box with real data.
Definition heffte_backend_stock.h:607
int complex_size() const override
Returns the size of the box with complex coefficients.
Definition heffte_backend_stock.h:609
size_t workspace_size() const override
Return the size of the needed workspace.
Definition heffte_backend_stock.h:611
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *) const override
Backward transform, single precision.
Definition heffte_backend_stock.h:585
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *) const override
Forward transform, single precision.
Definition heffte_backend_stock.h:578
stock_fft_executor_r2c(void *, box3d< index > const box, int dimension)
Constructor defines the box and the dimension of reduction.
Definition heffte_backend_stock.h:564
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *) const override
Forward transform, double precision.
Definition heffte_backend_stock.h:592
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *) const override
Backward transform, double precision.
Definition heffte_backend_stock.h:599
Wrapper around the Stock FFT API.
Definition heffte_backend_stock.h:446
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *workspace) const override
Converts the real data to complex and performs float-complex forward transform.
Definition heffte_backend_stock.h:504
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *workspace) const override
Performs backward float-complex transform and truncates the complex part of the result.
Definition heffte_backend_stock.h:509
size_t workspace_size() const override
Return the size of the needed workspace.
Definition heffte_backend_stock.h:528
void forward(std::complex< double > data[], std::complex< double > *) const override
Forward fft, double-complex case.
Definition heffte_backend_stock.h:489
void forward(std::complex< float > data[], std::complex< float > *) const override
Forward fft, float-complex case.
Definition heffte_backend_stock.h:475
void backward(std::complex< double > data[], std::complex< double > *) const override
Backward fft, double-complex case.
Definition heffte_backend_stock.h:496
stock_fft_executor(void *, box3d< index > const, int, int)
Placeholder, unimplemented.
Definition heffte_backend_stock.h:466
void backward(std::complex< float > data[], std::complex< float > *) const override
Backward fft, float-complex case.
Definition heffte_backend_stock.h:482
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *workspace) const override
Converts the real data to complex and performs double-complex forward transform.
Definition heffte_backend_stock.h:514
stock_fft_executor(void *, box3d< index > const)
Placeholder, unimplemented.
Definition heffte_backend_stock.h:470
int box_size() const override
Returns the size of the box.
Definition heffte_backend_stock.h:525
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *workspace) const override
Performs backward double-complex transform and truncates the complex part of the result.
Definition heffte_backend_stock.h:519
stock_fft_executor(void *, box3d< index > const box, int dimension)
Constructor, specifies the box and dimension.
Definition heffte_backend_stock.h:456
int fft1d_get_howmany(box3d< index > const box, int const dimension)
Return the number of 1-D ffts contained in the box in the given dimension.
Definition heffte_geometry.h:159
int fft1d_get_stride(box3d< index > const box, int const dimension)
Return the stride of the 1-D ffts contained in the box in the given dimension.
Definition heffte_geometry.h:169
@ backward
Inverse DFT transform.
Definition heffte_common.h:656
@ forward
Forward DFT transform.
Definition heffte_common.h:654
Contains type tags and templates metadata for the various backends.
Definition heffte_backend_cuda.h:178
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
Allows to define whether a specific backend interface has been enabled.
Definition heffte_common.h:226
Type-tag for the Cosine Transform type 1 using the stock FFT backend.
Definition heffte_common.h:140
Type-tag for the Cosine Transform using the stock FFT backend.
Definition heffte_common.h:130
Type-tag for the Sine Transform using the stock FFT backend.
Definition heffte_common.h:135
Type-tag for the stock FFT backend.
Definition heffte_common.h:125
A generic container that describes a 3d box of indexes.
Definition heffte_geometry.h:67
static const bool use_reorder
The reshape operations will also reorder the data.
Definition heffte_backend_stock.h:682
static const bool use_reorder
The reshape operations will also reorder the data.
Definition heffte_backend_stock.h:706
static const bool use_reorder
The reshape operations will also reorder the data.
Definition heffte_backend_stock.h:690
static const bool use_reorder
The reshape operations will also reorder the data.
Definition heffte_backend_stock.h:698
Defines a set of default plan options for a given backend.
Definition heffte_common.h:761
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition heffte_utils.h:252
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition heffte_utils.h:270
stock_fft_executor_r2c executor_r2c
Defines the real-to-complex executor.
Definition heffte_backend_stock.h:637
stock_fft_executor executor
Defines the complex-to-complex executor.
Definition heffte_backend_stock.h:635
real2real_executor< backend::stock, cpu_cos1_pre_pos_processor > executor
Defines the real-to-real executor.
Definition heffte_backend_stock.h:671
void executor_r2c
There is no real-to-complex variant.
Definition heffte_backend_stock.h:673
void executor_r2c
There is no real-to-complex variant.
Definition heffte_backend_stock.h:649
real2real_executor< backend::stock, cpu_cos_pre_pos_processor > executor
Defines the real-to-real executor.
Definition heffte_backend_stock.h:647
real2real_executor< backend::stock, cpu_sin_pre_pos_processor > executor
Defines the real-to-real executor.
Definition heffte_backend_stock.h:659
void executor_r2c
There is no real-to-complex variant.
Definition heffte_backend_stock.h:661
Indicates the structure that will be used by the fft backend.
Definition heffte_common.h:663
plan_stock_fft(int size, int howmanyffts, int stride, int dist)
Constructor to plan out an FFT using the stock backend.
Definition heffte_backend_stock.h:399
void execute(std::complex< F > data[])
Execute an FFT inplace on std::complex<F> data.
Definition heffte_backend_stock.h:409
Specialization for r2c single precision.
Definition heffte_backend_stock.h:314
void execute(std::complex< F > const idata[], F odata[])
Execute C2R FFT.
Definition heffte_backend_stock.h:334
void execute(F const idata[], std::complex< F > odata[])
Execute R2C FFT.
Definition heffte_backend_stock.h:359
plan_stock_fft(int size, int howmanyffts, int stride, int rdist, int cdist)
Constructor taking into account the different sizes for the real and complex parts.
Definition heffte_backend_stock.h:324
int N
Identical to the F-complex specialization.
Definition heffte_backend_stock.h:331
Template algorithm for the Sine and Cosine transforms.
Definition heffte_r2r_executor.h:192
Class to represent the call-graph nodes.
Definition heffte_stock_algos.h:78