Highly Efficient FFT for Exascale: HeFFTe v2.4
Loading...
Searching...
No Matches
heffte_backend_mkl.h
1/*
2 -- heFFTe --
3 Univ. of Tennessee, Knoxville
4 @date
5*/
6
7#ifndef HEFFTE_BACKEND_MKL_H
8#define HEFFTE_BACKEND_MKL_H
9
10#include "heffte_r2r_executor.h"
11
12#ifdef Heffte_ENABLE_MKL
13
14#include "mkl_dfti.h"
15
27namespace heffte{
28
29namespace backend{
34 template<> struct is_enabled<mkl> : std::true_type{};
39 template<> struct is_enabled<mkl_cos> : std::true_type{};
44 template<> struct is_enabled<mkl_sin> : std::true_type{};
49 template<> struct default_backend<tag::cpu> {
50 using type = mkl;
51 };
52}
53
58template<> struct is_ccomplex<float _Complex> : std::true_type{};
63template<> struct is_zcomplex<double _Complex> : std::true_type{};
64
69inline void check_error(MKL_LONG status, std::string const &function_name){
70 if (status != 0){
71 throw std::runtime_error(function_name + " failed with status: " + std::to_string(status));
72 }
73}
74
83template<typename scalar_type>
84struct plan_mkl{
93 plan_mkl(int size, int howmanyffts, int stride, int dist) : plan(nullptr){
94 static_assert(std::is_same<scalar_type, std::complex<float>>::value
95 or std::is_same<scalar_type, std::complex<double>>::value,
96 "plan_mkl requires std::complex scalar_type with float or double, see plan_mkl_r2c for real types");
97
98 check_error( DftiCreateDescriptor(&plan, (std::is_same<scalar_type, std::complex<float>>::value) ?
99 DFTI_SINGLE : DFTI_DOUBLE,
100 DFTI_COMPLEX, 1, static_cast<MKL_LONG>(size)), "mkl plan create" );
101 check_error( DftiSetValue(plan, DFTI_NUMBER_OF_TRANSFORMS, static_cast<MKL_LONG>(howmanyffts)), "mkl set howmany");
102 check_error( DftiSetValue(plan, DFTI_PLACEMENT, DFTI_INPLACE), "mkl set in place");
103 MKL_LONG lstride[] = {0, static_cast<MKL_LONG>(stride)};
104 check_error( DftiSetValue(plan, DFTI_INPUT_STRIDES, lstride), "mkl set istride");
105 check_error( DftiSetValue(plan, DFTI_OUTPUT_STRIDES, lstride), "mkl set ostride");
106 check_error( DftiSetValue(plan, DFTI_INPUT_DISTANCE, static_cast<MKL_LONG>(dist)), "mkl set idist");
107 check_error( DftiSetValue(plan, DFTI_OUTPUT_DISTANCE, static_cast<MKL_LONG>(dist)), "mkl set odist");
108 check_error( DftiCommitDescriptor(plan), "mkl commit");
109 }
119 plan_mkl(size_t size1, size_t size2, std::array<MKL_LONG, 2> const &embed, size_t howmanyffts, size_t dist) : plan(nullptr){
120 static_assert(std::is_same<scalar_type, std::complex<float>>::value
121 or std::is_same<scalar_type, std::complex<double>>::value,
122 "plan_mkl requires std::complex scalar_type with float or double, see plan_mkl_r2c for real types");
123
124 MKL_LONG size[] = {static_cast<MKL_LONG>(size1), static_cast<MKL_LONG>(size2)};
125 check_error( DftiCreateDescriptor(&plan, (std::is_same<scalar_type, std::complex<float>>::value) ?
126 DFTI_SINGLE : DFTI_DOUBLE,
127 DFTI_COMPLEX, 2, size), "mkl plan create 2d" );
128 check_error( DftiSetValue(plan, DFTI_NUMBER_OF_TRANSFORMS, static_cast<MKL_LONG>(howmanyffts)), "mkl set howmany");
129 check_error( DftiSetValue(plan, DFTI_PLACEMENT, DFTI_INPLACE), "mkl set in place");
130 MKL_LONG lstride[] = {0, static_cast<MKL_LONG>(embed[0]), static_cast<MKL_LONG>(embed[1])};
131 check_error( DftiSetValue(plan, DFTI_INPUT_STRIDES, lstride), "mkl set istride");
132 check_error( DftiSetValue(plan, DFTI_OUTPUT_STRIDES, lstride), "mkl set ostride");
133 check_error( DftiSetValue(plan, DFTI_INPUT_DISTANCE, static_cast<MKL_LONG>(dist)), "mkl set idist");
134 check_error( DftiSetValue(plan, DFTI_OUTPUT_DISTANCE, static_cast<MKL_LONG>(dist)), "mkl set odist");
135 check_error( DftiCommitDescriptor(plan), "mkl commit");
136
137 }
145 plan_mkl(int size1, int size2, int size3){
146 MKL_LONG size[] = {static_cast<MKL_LONG>(size3), static_cast<MKL_LONG>(size2), static_cast<MKL_LONG>(size1)};
147 check_error( DftiCreateDescriptor(&plan, (std::is_same<scalar_type, std::complex<float>>::value) ?
148 DFTI_SINGLE : DFTI_DOUBLE,
149 DFTI_COMPLEX, 3, size), "mkl plan create 3d" );
150 check_error( DftiSetValue(plan, DFTI_NUMBER_OF_TRANSFORMS, 1), "mkl set howmany");
151 check_error( DftiSetValue(plan, DFTI_PLACEMENT, DFTI_INPLACE), "mkl set in place");
152 check_error( DftiCommitDescriptor(plan), "mkl commit");
153 }
154
156 ~plan_mkl(){ check_error( DftiFreeDescriptor(&plan), "mkl delete descriptor"); }
158 operator DFTI_DESCRIPTOR_HANDLE() const{ return plan; }
160 DFTI_DESCRIPTOR_HANDLE plan;
161};
162
176public:
184 template<typename index>
185 mkl_executor(void*, box3d<index> const box, int dimension) :
186 size(box.size[dimension]), size2(0),
187 howmanyffts(fft1d_get_howmany(box, dimension)),
188 stride(fft1d_get_stride(box, dimension)),
189 dist((dimension == box.order[0]) ? size : 1),
190 blocks((dimension == box.order[1]) ? box.osize(2) : 1),
191 block_stride(box.osize(0) * box.osize(1)),
192 total_size(box.count()),
193 embed({0, 0})
194 {}
196 template<typename index>
197 mkl_executor(void*, box3d<index> const box, int dir1, int dir2) :
198 size(box.size[std::min(dir1, dir2)]), size2(box.size[std::max(dir1, dir2)]),
199 blocks(1), block_stride(0), total_size(box.count())
200 {
201 int odir1 = box.find_order(dir1);
202 int odir2 = box.find_order(dir2);
203
204 if (std::min(odir1, odir2) == 0 and std::max(odir1, odir2) == 1){
205 stride = 1;
206 dist = size * size2;
207 embed = {static_cast<MKL_LONG>(stride), static_cast<MKL_LONG>(size)};
208 howmanyffts = box.size[2];
209 }else if (std::min(odir1, odir2) == 1 and std::max(odir1, odir2) == 2){
210 stride = box.size[0];
211 dist = 1;
212 embed = {static_cast<MKL_LONG>(stride), static_cast<MKL_LONG>(size) * static_cast<MKL_LONG>(stride)};
213 howmanyffts = box.size[0];
214 }else{ // case of directions (0, 2)
215 stride = 1;
216 dist = size;
217 embed = {static_cast<MKL_LONG>(stride), static_cast<MKL_LONG>(box.size[1]) * static_cast<MKL_LONG>(box.size[0])};
218 howmanyffts = box.size[1];
219 }
220 }
222 template<typename index>
223 mkl_executor(void*, box3d<index> const box) :
224 size(box.size[0]), size2(box.size[1]), howmanyffts(box.size[2]),
225 stride(0), dist(0),
226 blocks(1), block_stride(0),
227 total_size(box.count()),
228 embed({0, 0})
229 {}
230
232 void forward(std::complex<float> data[], std::complex<float>*) const override{
233 make_plan(cplan);
234 for(int i=0; i<blocks; i++){
235 float _Complex* block_data = reinterpret_cast<float _Complex*>(data + i * block_stride);
236 DftiComputeForward(*cplan, block_data);
237 }
238 }
240 void backward(std::complex<float> data[], std::complex<float>*) const override{
241 make_plan(cplan);
242 for(int i=0; i<blocks; i++){
243 float _Complex* block_data = reinterpret_cast<float _Complex*>(data + i * block_stride);
244 DftiComputeBackward(*cplan, block_data);
245 }
246 }
248 void forward(std::complex<double> data[], std::complex<double>*) const override{
249 make_plan(zplan);
250 for(int i=0; i<blocks; i++){
251 double _Complex* block_data = reinterpret_cast<double _Complex*>(data + i * block_stride);
252 DftiComputeForward(*zplan, block_data);
253 }
254 }
256 void backward(std::complex<double> data[], std::complex<double>*) const override{
257 make_plan(zplan);
258 for(int i=0; i<blocks; i++){
259 double _Complex* block_data = reinterpret_cast<double _Complex*>(data + i * block_stride);
260 DftiComputeBackward(*zplan, block_data);
261 }
262 }
263
265 void forward(float const indata[], std::complex<float> outdata[], std::complex<float> *workspace) const override{
266 for(int i=0; i<total_size; i++) outdata[i] = std::complex<float>(indata[i]);
267 forward(outdata, workspace);
268 }
270 void backward(std::complex<float> indata[], float outdata[], std::complex<float> *workspace) const override{
271 backward(indata, workspace);
272 for(int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
273 }
275 void forward(double const indata[], std::complex<double> outdata[], std::complex<double> *workspace) const override{
276 for(int i=0; i<total_size; i++) outdata[i] = std::complex<double>(indata[i]);
277 forward(outdata, workspace);
278 }
280 void backward(std::complex<double> indata[], double outdata[], std::complex<double> *workspace) const override{
281 backward(indata, workspace);
282 for(int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
283 }
284
286 int box_size() const override{ return total_size; }
288 size_t workspace_size() const override{ return 0; }
289
290private:
292 template<typename scalar_type>
293 void make_plan(std::unique_ptr<plan_mkl<scalar_type>> &plan) const{
294 if (not plan){
295 if (dist == 0)
296 plan = std::unique_ptr<plan_mkl<scalar_type>>(new plan_mkl<scalar_type>(size, size2, howmanyffts));
297 else if (size2 == 0)
298 plan = std::unique_ptr<plan_mkl<scalar_type>>(new plan_mkl<scalar_type>(size, howmanyffts, stride, dist));
299 else
300 plan = std::unique_ptr<plan_mkl<scalar_type>>(new plan_mkl<scalar_type>(size, size2, embed, howmanyffts, dist));
301 }
302 }
303
304 int size, size2, howmanyffts, stride, dist, blocks, block_stride, total_size;
305 std::array<MKL_LONG, 2> embed;
306 mutable std::unique_ptr<plan_mkl<std::complex<float>>> cplan;
307 mutable std::unique_ptr<plan_mkl<std::complex<double>>> zplan;
308};
309
314template<typename scalar_type, direction dir>
325 plan_mkl_r2c(int size, int howmanyffts, int stride, int rdist, int cdist) : plan(nullptr){
326
327 static_assert(std::is_same<scalar_type, float>::value or std::is_same<scalar_type, double>::value,
328 "plan_mkl_r2c requires scalar_type with float or double, see plan_mkl for complex types");
329
330 check_error( DftiCreateDescriptor(&plan, (std::is_same<scalar_type,float>::value) ?
331 DFTI_SINGLE : DFTI_DOUBLE,
332 DFTI_REAL, 1, static_cast<MKL_LONG>(size)), "mkl create r2c");
333 check_error( DftiSetValue(plan, DFTI_NUMBER_OF_TRANSFORMS, static_cast<MKL_LONG>(howmanyffts)), "mkl set howmany r2c");
334 check_error( DftiSetValue(plan, DFTI_PLACEMENT, DFTI_NOT_INPLACE), "mkl set not in place r2c");
335 check_error( DftiSetValue(plan, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX), "mkl conj storage cc");
336 MKL_LONG lstride[] = {0, static_cast<MKL_LONG>(stride)};
337 check_error( DftiSetValue(plan, DFTI_INPUT_STRIDES, lstride), "mkl set istride r2c");
338 check_error( DftiSetValue(plan, DFTI_OUTPUT_STRIDES, lstride), "mkl set ostride r2c");
339 if (dir == direction::forward){
340 check_error( DftiSetValue(plan, DFTI_INPUT_DISTANCE, static_cast<MKL_LONG>(rdist)), "mkl set rdist r2c");
341 check_error( DftiSetValue(plan, DFTI_OUTPUT_DISTANCE, static_cast<MKL_LONG>(cdist)), "mkl set cdist r2c");
342 }else{
343 check_error( DftiSetValue(plan, DFTI_OUTPUT_DISTANCE, static_cast<MKL_LONG>(rdist)), "mkl set back rdist r2c");
344 check_error( DftiSetValue(plan, DFTI_INPUT_DISTANCE, static_cast<MKL_LONG>(cdist)), "mkl set back cdist r2c");
345 }
346 check_error( DftiCommitDescriptor(plan), "mkl commit r2c");
347 }
348
349
351 ~plan_mkl_r2c(){ check_error( DftiFreeDescriptor(&plan), "mkl free r2c"); }
353 operator DFTI_DESCRIPTOR_HANDLE() const{ return plan; }
355 DFTI_DESCRIPTOR_HANDLE plan;
356};
357
367public:
377 template<typename index>
378 mkl_executor_r2c(void*, box3d<index> const box, int dimension) :
379 size(box.size[dimension]),
380 howmanyffts(fft1d_get_howmany(box, dimension)),
381 stride(fft1d_get_stride(box, dimension)),
382 blocks((dimension == box.order[1]) ? box.osize(2) : 1),
383 rdist((dimension == box.order[0]) ? size : 1),
384 cdist((dimension == box.order[0]) ? size/2 + 1 : 1),
385 rblock_stride(box.osize(0) * box.osize(1)),
386 cblock_stride(box.osize(0) * (box.osize(1)/2 + 1)),
387 rsize(box.count()),
388 csize(box.r2c(dimension).count())
389 {}
390
392 void forward(float const indata[], std::complex<float> outdata[], std::complex<float>*) const override{
393 make_plan(sforward);
394 for(int i=0; i<blocks; i++){
395 float *rdata = const_cast<float*>(indata + i * rblock_stride);
396 float _Complex* cdata = reinterpret_cast<float _Complex*>(outdata + i * cblock_stride);
397 DftiComputeForward(*sforward, rdata, cdata);
398 }
399 }
401 void backward(std::complex<float> indata[], float outdata[], std::complex<float>*) const override{
402 make_plan(sbackward);
403 for(int i=0; i<blocks; i++){
404 float _Complex* cdata = const_cast<float _Complex*>(reinterpret_cast<float _Complex const*>(indata + i * cblock_stride));
405 DftiComputeBackward(*sbackward, cdata, outdata + i * rblock_stride);
406 }
407 }
409 void forward(double const indata[], std::complex<double> outdata[], std::complex<double>*) const override{
410 make_plan(dforward);
411 for(int i=0; i<blocks; i++){
412 double *rdata = const_cast<double*>(indata + i * rblock_stride);
413 double _Complex* cdata = reinterpret_cast<double _Complex*>(outdata + i * cblock_stride);
414 DftiComputeForward(*dforward, rdata, cdata);
415 }
416 }
418 void backward(std::complex<double> indata[], double outdata[], std::complex<double>*) const override{
419 make_plan(dbackward);
420 for(int i=0; i<blocks; i++){
421 double _Complex* cdata = const_cast<double _Complex*>(reinterpret_cast<double _Complex const*>(indata + i * cblock_stride));
422 DftiComputeBackward(*dbackward, cdata, outdata + i * rblock_stride);
423 }
424 }
425
427 int box_size() const override{ return rsize; }
429 int complex_size() const override{ return csize; }
431 size_t workspace_size() const override{ return 0; }
432
433private:
435 template<typename scalar_type, direction dir>
436 void make_plan(std::unique_ptr<plan_mkl_r2c<scalar_type, dir>> &plan) const{
437 if (!plan) plan = std::unique_ptr<plan_mkl_r2c<scalar_type, dir>>(new plan_mkl_r2c<scalar_type, dir>(size, howmanyffts, stride, rdist, cdist));
438 }
439
440 int size, howmanyffts, stride, blocks;
441 int rdist, cdist, rblock_stride, cblock_stride, rsize, csize;
442 mutable std::unique_ptr<plan_mkl_r2c<float, direction::forward>> sforward;
443 mutable std::unique_ptr<plan_mkl_r2c<double, direction::forward>> dforward;
444 mutable std::unique_ptr<plan_mkl_r2c<float, direction::backward>> sbackward;
445 mutable std::unique_ptr<plan_mkl_r2c<double, direction::backward>> dbackward;
446};
447
454template<> struct one_dim_backend<backend::mkl>{
459};
484
489template<> struct default_plan_options<backend::mkl>{
491 static const bool use_reorder = false;
492};
497template<> struct default_plan_options<backend::mkl_cos>{
499 static const bool use_reorder = true;
500};
505template<> struct default_plan_options<backend::mkl_sin>{
507 static const bool use_reorder = true;
508};
509
510}
511
512#endif
513
514#endif /* HEFFTE_BACKEND_MKL_H */
Base class for all backend executors.
Definition heffte_common.h:561
virtual int complex_size() const
Return the size of the complex-box (r2c executors).
Definition heffte_common.h:594
virtual void backward(float[], float *) const
Backward r2r, single precision.
Definition heffte_common.h:570
virtual void forward(float[], float *) const
Forward r2r, single precision.
Definition heffte_common.h:566
Wrapper to mkl API for real-to-complex transform with shortening of the data.
Definition heffte_backend_mkl.h:366
int box_size() const override
Returns the size of the box with real data.
Definition heffte_backend_mkl.h:427
int complex_size() const override
Returns the size of the box with complex coefficients.
Definition heffte_backend_mkl.h:429
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *) const override
Forward transform, double precision.
Definition heffte_backend_mkl.h:409
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *) const override
Forward transform, single precision.
Definition heffte_backend_mkl.h:392
size_t workspace_size() const override
Return the size of the needed workspace.
Definition heffte_backend_mkl.h:431
mkl_executor_r2c(void *, box3d< index > const box, int dimension)
Constructor defines the box and the dimension of reduction.
Definition heffte_backend_mkl.h:378
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *) const override
Backward transform, double precision.
Definition heffte_backend_mkl.h:418
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *) const override
Backward transform, single precision.
Definition heffte_backend_mkl.h:401
Wrapper around the MKL API.
Definition heffte_backend_mkl.h:175
void forward(std::complex< double > data[], std::complex< double > *) const override
Forward fft, double-complex case.
Definition heffte_backend_mkl.h:248
void forward(std::complex< float > data[], std::complex< float > *) const override
Forward fft, float-complex case.
Definition heffte_backend_mkl.h:232
size_t workspace_size() const override
Return the size of the needed workspace.
Definition heffte_backend_mkl.h:288
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *workspace) const override
Converts the real data to complex and performs double-complex forward transform.
Definition heffte_backend_mkl.h:275
mkl_executor(void *, box3d< index > const box)
Merges three FFTs into one.
Definition heffte_backend_mkl.h:223
int box_size() const override
Returns the size of the box.
Definition heffte_backend_mkl.h:286
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *workspace) const override
Performs backward double-complex transform and truncates the complex part of the result.
Definition heffte_backend_mkl.h:280
mkl_executor(void *, box3d< index > const box, int dir1, int dir2)
Merges two FFTs into one.
Definition heffte_backend_mkl.h:197
mkl_executor(void *, box3d< index > const box, int dimension)
Constructor, specifies the box and dimension.
Definition heffte_backend_mkl.h:185
void backward(std::complex< float > data[], std::complex< float > *) const override
Backward fft, float-complex case.
Definition heffte_backend_mkl.h:240
void backward(std::complex< double > data[], std::complex< double > *) const override
Backward fft, double-complex case.
Definition heffte_backend_mkl.h:256
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *workspace) const override
Converts the real data to complex and performs float-complex forward transform.
Definition heffte_backend_mkl.h:265
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *workspace) const override
Performs backward float-complex transform and truncates the complex part of the result.
Definition heffte_backend_mkl.h:270
int fft1d_get_howmany(box3d< index > const box, int const dimension)
Return the number of 1-D ffts contained in the box in the given dimension.
Definition heffte_geometry.h:159
int fft1d_get_stride(box3d< index > const box, int const dimension)
Return the stride of the 1-D ffts contained in the box in the given dimension.
Definition heffte_geometry.h:169
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
void check_error(MKL_LONG status, std::string const &function_name)
Checks the status of a call to the MKL backend.
Definition heffte_backend_mkl.h:69
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
Defines inverse mapping from the location tag to a default backend tag.
Definition heffte_common.h:430
Allows to define whether a specific backend interface has been enabled.
Definition heffte_common.h:226
Type-tag for the Cosine Transform using the MKL FFT backend.
Definition heffte_common.h:151
Type-tag for the Sine Transform using the MKL FFT backend.
Definition heffte_common.h:156
Type-tag for the MKL backend.
Definition heffte_common.h:146
A generic container that describes a 3d box of indexes.
Definition heffte_geometry.h:67
std::array< index, 3 > const size
The number of indexes in each direction.
Definition heffte_geometry.h:129
int find_order(int dir) const
Returns the effective order of the direction (dir), 0 -> fast, 1 -> mid, 2 -> slow.
Definition heffte_geometry.h:121
Defines a set of default plan options for a given backend.
Definition heffte_common.h:761
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition heffte_utils.h:252
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition heffte_utils.h:270
void executor_r2c
Defines the real-to-complex executor.
Definition heffte_backend_mkl.h:470
void executor_r2c
Defines the real-to-complex executor.
Definition heffte_backend_mkl.h:482
Indicates the structure that will be used by the fft backend.
Definition heffte_common.h:663
Unlike the C2C plan R2C is non-symmetric and it requires that the direction is built into the plan.
Definition heffte_backend_mkl.h:315
DFTI_DESCRIPTOR_HANDLE plan
Identical to the float-complex specialization.
Definition heffte_backend_mkl.h:355
plan_mkl_r2c(int size, int howmanyffts, int stride, int rdist, int cdist)
Constructor taking into account the different sizes for the real and complex parts.
Definition heffte_backend_mkl.h:325
~plan_mkl_r2c()
Identical to the float-complex specialization.
Definition heffte_backend_mkl.h:351
Base plan for backend::mkl, works only for float and double complex.
Definition heffte_backend_mkl.h:84
plan_mkl(int size, int howmanyffts, int stride, int dist)
Constructor, takes inputs identical to MKL FFT descriptors.
Definition heffte_backend_mkl.h:93
~plan_mkl()
Destructor, deletes the plan.
Definition heffte_backend_mkl.h:156
plan_mkl(size_t size1, size_t size2, std::array< MKL_LONG, 2 > const &embed, size_t howmanyffts, size_t dist)
Constructor, takes inputs identical to MKL FFT descriptors.
Definition heffte_backend_mkl.h:119
DFTI_DESCRIPTOR_HANDLE plan
The MKL opaque structure (pointer to struct).
Definition heffte_backend_mkl.h:160
plan_mkl(int size1, int size2, int size3)
Constructor, takes inputs identical to MKL FFT descriptors.
Definition heffte_backend_mkl.h:145
Template algorithm for the Sine and Cosine transforms.
Definition heffte_r2r_executor.h:192