Highly Efficient FFT for Exascale: HeFFTe v2.4
Loading...
Searching...
No Matches
heffte_pack3d.h
1/*
2 -- heFFTe --
3 Univ. of Tennessee, Knoxville
4 @date
5*/
6
7#ifndef HEFFTE_PACK3D_H
8#define HEFFTE_PACK3D_H
9
10#include "heffte_common.h"
11
24
25namespace heffte {
26
31template<typename index>
34 std::array<index, 3> size;
44 std::array<int, 3> map;
45};
46
51template<typename index>
52inline std::ostream & operator << (std::ostream &os, pack_plan_3d<index> const &plan){
53 os << "nfast = " << plan.size[0] << "\n";
54 os << "nmid = " << plan.size[1] << "\n";
55 os << "nslow = " << plan.size[2] << "\n";
56 os << "line_stride = " << plan.line_stride << "\n";
57 os << "plane_stride = " << plan.plane_stride << "\n";
58 if (plan.buff_line_stride > 0){
59 os << "buff_line_stride = " << plan.buff_line_stride << "\n";
60 os << "buff_plane_stride = " << plan.buff_plane_stride << "\n";
61 os << "map = (" << plan.map[0] << ", " << plan.map[1] << ", " << plan.map[2] << ")\n";
62 }
63 os << "\n";
64 return os;
65}
66
74template<typename backend>
76
77// typename struct packer_backend<cuda>{ using mode = tag::gpu; } // specialization can differentiate between gpu and cpu backends
78
83template<typename mode> struct direct_packer{};
84
89template<> struct direct_packer<tag::cpu>{
91 template<typename scalar_type, typename index>
92 void pack(void*, pack_plan_3d<index> const &plan, scalar_type const data[], scalar_type buffer[]) const{
93 scalar_type* buffer_iterator = buffer;
94 for(index slow = 0; slow < plan.size[2]; slow++){
95 for(index mid = 0; mid < plan.size[1]; mid++){
96 buffer_iterator = std::copy_n(&data[slow * plan.plane_stride + mid * plan.line_stride], plan.size[0], buffer_iterator);
97 }
98 }
99 }
100
101 template<typename scalar_type, typename index>
102 void unpack(void*, pack_plan_3d<index> const &plan, scalar_type const buffer[], scalar_type data[]) const{
103 for(index slow = 0; slow < plan.size[2]; slow++){
104 for(index mid = 0; mid < plan.size[1]; mid++){
105 std::copy_n(&buffer[(slow * plan.size[1] + mid) * plan.size[0]],
106 plan.size[0], &data[slow * plan.plane_stride + mid * plan.line_stride]);
107 }
108 }
109 }
110};
111
116template<typename mode> struct transpose_packer{};
117
122template<> struct transpose_packer<tag::cpu>{
124 template<typename scalar_type, typename index>
125 void pack(void *q, pack_plan_3d<index> const &plan, scalar_type const data[], scalar_type buffer[]) const{
126 direct_packer<tag::cpu>().pack(q, plan, data, buffer); // packing is done the same way as the direct_packer
127 }
128
134 template<typename scalar_type, typename index>
135 void unpack(void*, pack_plan_3d<index> const &plan, scalar_type const buffer[], scalar_type data[]) const{
136 constexpr index stride = 256 / sizeof(scalar_type);
137 if (plan.map[0] == 0 and plan.map[1] == 1){
138 for(index i=0; i<plan.size[2]; i++)
139 for(index j=0; j<plan.size[1]; j++)
140 for(index k=0; k<plan.size[0]; k++)
141 data[i * plan.plane_stride + j * plan.line_stride + k]
142 = buffer[ i * plan.buff_plane_stride + j * plan.buff_line_stride + k ];
143
144 }else if (plan.map[0] == 0 and plan.map[1] == 2){
145 for(index bi=0; bi<plan.size[2]; bi+=stride)
146 for(index bj=0; bj<plan.size[1]; bj+=stride)
147 for(index bk=0; bk<plan.size[0]; bk+=stride)
148 for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)
149 for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)
150 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)
151 data[i * plan.plane_stride + j * plan.line_stride + k]
152 = buffer[ j * plan.buff_plane_stride + i * plan.buff_line_stride + k ];
153
154 }else if (plan.map[0] == 1 and plan.map[1] == 0){
155 for(index bi=0; bi<plan.size[2]; bi+=stride)
156 for(index bj=0; bj<plan.size[1]; bj+=stride)
157 for(index bk=0; bk<plan.size[0]; bk+=stride)
158 for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)
159 for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)
160 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)
161 data[i * plan.plane_stride + j * plan.line_stride + k]
162 = buffer[ i * plan.buff_plane_stride + k * plan.buff_line_stride + j ];
163
164 }else if (plan.map[0] == 1 and plan.map[1] == 2){
165 for(index bi=0; bi<plan.size[2]; bi+=stride)
166 for(index bj=0; bj<plan.size[1]; bj+=stride)
167 for(index bk=0; bk<plan.size[0]; bk+=stride)
168 for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)
169 for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)
170 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)
171 data[i * plan.plane_stride + j * plan.line_stride + k]
172 = buffer[ k * plan.buff_plane_stride + i * plan.buff_line_stride + j ];
173
174 }else if (plan.map[0] == 2 and plan.map[1] == 0){
175 for(index bi=0; bi<plan.size[2]; bi+=stride)
176 for(index bj=0; bj<plan.size[1]; bj+=stride)
177 for(index bk=0; bk<plan.size[0]; bk+=stride)
178 for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)
179 for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)
180 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)
181 data[i * plan.plane_stride + j * plan.line_stride + k]
182 = buffer[ j * plan.buff_plane_stride + k * plan.buff_line_stride + i ];
183
184 }else{ // if (plan.map[0] == 2 and plan.map[1] == 1){
185 for(index bi=0; bi<plan.size[2]; bi+=stride)
186 for(index bj=0; bj<plan.size[1]; bj+=stride)
187 for(index bk=0; bk<plan.size[0]; bk+=stride)
188 for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)
189 for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)
190 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)
191 data[i * plan.plane_stride + j * plan.line_stride + k]
192 = buffer[ k * plan.buff_plane_stride + j * plan.buff_line_stride + i ];
193
194 }
195
196 }
197};
198
203namespace data_scaling {
208 template<typename scalar_type, typename index>
209 void apply(void*, index num_entries, scalar_type *data, double scale_factor){;
210 for(index i=0; i<num_entries; i++) data[i] *= scale_factor;
211 }
212
221 template<typename precision_type, typename index>
222 void apply(void *stream, index num_entries, std::complex<precision_type> *data, double scale_factor){
223 apply<precision_type>(stream, 2*num_entries, reinterpret_cast<precision_type*>(data), scale_factor);
224 }
225
229 template<typename scalar_type, typename index>
230 void apply(index num_entries, scalar_type *data, double scale_factor){
231 apply(nullptr, num_entries, data, scale_factor);
232 }
233}
234
235}
236
237#endif
std::ostream & operator<<(std::ostream &os, box3d< index > const box)
Debugging info, writes out the box to a stream.
Definition heffte_geometry.h:146
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition heffte_backend_cuda.h:837
Contains internal type-tags.
Definition heffte_common.h:30
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
void pack(void *, pack_plan_3d< index > const &plan, scalar_type const data[], scalar_type buffer[]) const
Execute the planned pack operation.
Definition heffte_pack3d.h:92
void unpack(void *, pack_plan_3d< index > const &plan, scalar_type const buffer[], scalar_type data[]) const
Execute the planned unpack operation.
Definition heffte_pack3d.h:102
Defines the direct packer without implementation, use the specializations to get the CPU or GPU imple...
Definition heffte_pack3d.h:83
Holds the plan for a pack/unpack operation.
Definition heffte_pack3d.h:32
index buff_plane_stride
Stride of the planes in the received buffer (transpose packing only).
Definition heffte_pack3d.h:42
index line_stride
Stride of the lines.
Definition heffte_pack3d.h:36
index plane_stride
Stride of the planes.
Definition heffte_pack3d.h:38
std::array< index, 3 > size
Number of elements in the three directions.
Definition heffte_pack3d.h:34
std::array< int, 3 > map
Maps the i,j,k indexes from input to the output (transpose packing only).
Definition heffte_pack3d.h:44
index buff_line_stride
Stride of the lines in the received buffer (transpose packing only).
Definition heffte_pack3d.h:40
The packer needs to know whether the data will be on the CPU or GPU devices.
Definition heffte_pack3d.h:75
void pack(void *q, pack_plan_3d< index > const &plan, scalar_type const data[], scalar_type buffer[]) const
Execute the planned pack operation.
Definition heffte_pack3d.h:125
void unpack(void *, pack_plan_3d< index > const &plan, scalar_type const buffer[], scalar_type data[]) const
Execute the planned unpack operation.
Definition heffte_pack3d.h:135
Defines the transpose packer without implementation, use the specializations to get the CPU implement...
Definition heffte_pack3d.h:116