7#ifndef HEFFFTE_COS_EXECUTOR_H
8#define HEFFFTE_COS_EXECUTOR_H
10#include "heffte_pack3d.h"
31template<
typename prepos_processor,
typename index>
33 std::array<index, 3> high{box.
size[0]-1, box.
size[1]-1, box.
size[2]-1};
34 high[box.
order[0]] = prepos_processor::compute_extended_length(box.
osize(0)) - 1;
45 template<
typename precision>
46 static void pre_forward(
void*,
int length, precision
const input[], precision fft_signal[]){
47 for(
int i = 0; i < length; i++){
49 fft_signal[2*i+1] = input[i];
51 fft_signal[2*length] = 0;
52 for(
int i = 0; i < 2*length; i++){
53 fft_signal[4*length-i] = fft_signal[i];
57 template<
typename precision>
58 static void post_forward(
void*,
int length, std::complex<precision>
const fft_result[], precision result[]){
59 for(
int i = 0; i < length; i++){
60 result[i] = std::real(fft_result[i]);
64 template<
typename precision>
65 static void pre_backward(
void*,
int length, precision
const input[], std::complex<precision> fft_signal[]){
66 for(
int i = 0; i < length; i++){
67 fft_signal[i] = std::complex<precision>(input[i]);
69 fft_signal[length] = 0.0;
72 for(
int i = length+1; i < 2*length+1; i++){
73 fft_signal[i] = std::complex<precision>(-1.0 * input[index]);
78 template<
typename precision>
79 static void post_backward(
void*,
int length, precision
const fft_result[], precision result[]){
80 for(
int i=0; i<length; i++)
81 result[i] = fft_result[2*i + 1];
95 template<
typename precision>
96 static void pre_forward(
void*,
int length, precision
const input[], precision fft_signal[]){
97 for(
int i=0; i<length; i++){
98 fft_signal[2*i] = 0.0;
99 fft_signal[2*i+1] = input[i];
101 fft_signal[2*length] = 0.;
102 for(
int i=0; i<length; i++){
103 fft_signal[4*length-2*i] = 0.0;
104 fft_signal[4*length-2*i-1]= -input[i];
108 template<
typename precision>
109 static void post_forward(
void*,
int length, std::complex<precision>
const fft_result[], precision result[]){
110 for(
int i=0; i < length; i++)
111 result[i] = -std::imag(fft_result[i+1]);
114 template<
typename precision>
115 static void pre_backward(
void*,
int length, precision
const input[], std::complex<precision> fft_signal[]){
116 fft_signal[0] = std::complex<precision>(0.0);
117 for(
int i=0; i < length; i++){
118 fft_signal[i+1] = std::complex<precision>(0.0, -input[i]);
120 fft_signal[2*length] = std::complex<precision>(0.0);
121 for(
int i=0; i < length-1; i++){
122 fft_signal[length + i + 1] = std::complex<precision>(0.0, -input[length - i - 2]);
126 template<
typename precision>
127 static void post_backward(
void*,
int length, precision
const fft_result[], precision result[]){
142 template<
typename precision>
143 static void pre_forward(
void*,
int length, precision
const input[], precision fft_signal[]){
144 for (
int i = 0; i < length-1; i++){
145 fft_signal[2*i] = input[i];
146 fft_signal[2*i+1] = 0.0;
148 for (
int i = 1; i < length; i++){
149 fft_signal[4*(length-1)-2*i] = input[i];
150 fft_signal[4*(length-1)-2*i+1] = 0.0;
154 template<
typename precision>
155 static void post_forward(
void*,
int length, std::complex<precision>
const fft_result[], precision result[]){
159 template<
typename precision>
160 static void pre_backward(
void*,
int length, precision
const input[], std::complex<precision> fft_signal[]){
161 for (
int i = 0; i < length; i++){
162 fft_signal[i] = std::complex<precision>(input[i], 0.0);
164 int index = length-2;
165 for (
int i = length; i < 2*length-1; i++){
166 fft_signal[i] = std::complex<precision>(input[index], 0.0);
171 template<
typename precision>
172 static void post_backward(
void*,
int length, precision
const fft_result[], precision result[]){
173 for(
int i=0; i<length; i++)
174 result[i] = fft_result[2*i];
178 return 4 * ( length-1 );
191template<
typename fft_backend_tag,
typename prepost_processor>
199 template<
typename index>
202 length(box.osize(0)),
203 extended_length(prepost_processor::compute_extended_length(box.osize(0))),
204 num_batch(box.osize(1) * box.osize(2)),
205 total_size(box.count()),
206 fft(make_executor_r2c<fft_backend_tag>(stream,
make_extended_box<prepost_processor>(box), dimension))
208 assert(dimension == box.
order[0]);
211 template<
typename index>
213 {
throw std::runtime_error(
"2D real-to-real transform is not yet implemented!"); }
215 template<
typename index>
217 {
throw std::runtime_error(
"3D real-to-real transform is not yet implemented!"); }
220 template<
typename scalar_type>
221 void forward(scalar_type data[], scalar_type workspace[])
const{
223 std::complex<scalar_type>* ctemp =
align<fft_backend_tag>::pntr(
reinterpret_cast<std::complex<scalar_type>*
>(workspace + fft->box_size() + 1));
224 std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ? nullptr : ctemp + fft->complex_size();
226 for(
int i=0; i<num_batch; i++){
227 prepost_processor::pre_forward(stream, length, data + i * length, temp + i * extended_length );
229 fft->forward(temp, ctemp, fft_work);
230 for(
int i=0; i<num_batch; i++)
231 prepost_processor::post_forward(stream, length, ctemp + i * ( extended_length/2 + 1 ), data + i * length);
234 template<
typename scalar_type>
235 void backward(scalar_type data[], scalar_type workspace[])
const{
237 std::complex<scalar_type>* ctemp =
align<fft_backend_tag>::pntr(
reinterpret_cast<std::complex<scalar_type>*
>(workspace + fft->box_size() + 1));
238 std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ? nullptr : ctemp + fft->complex_size();
240 for(
int i=0; i<num_batch; i++)
241 prepost_processor::pre_backward(stream, length, data + i * length, ctemp + i * ( extended_length/2 + 1));
242 fft->backward(ctemp, temp, fft_work);
243 for(
int i=0; i<num_batch; i++)
244 prepost_processor::post_backward(stream, length, temp + i * extended_length, data + i * length);
248 template<
typename precision>
249 void forward(precision
const[], std::complex<precision>[])
const{
250 throw std::runtime_error(
"Calling cos-transform with real-to-complex data! This should not happen!");
253 template<
typename precision>
254 void backward(std::complex<precision> indata[], precision outdata[])
const{
forward(outdata, indata); }
257 int box_size()
const override{
return total_size; }
260 return fft->box_size() + 2 * fft->complex_size() + 2 * fft->workspace_size()
261 + ((std::is_same<fft_backend_tag, backend::cufft>::value) ? 2 : 1 );
275 int length, extended_length, num_batch, total_size;
277 std::unique_ptr<typename one_dim_backend<fft_backend_tag>::executor_r2c> fft;
Base class for all backend executors.
Definition heffte_common.h:561
virtual void backward(float[], float *) const
Backward r2r, single precision.
Definition heffte_common.h:570
virtual void forward(float[], float *) const
Forward r2r, single precision.
Definition heffte_common.h:566
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
box3d< index > make_extended_box(box3d< index > const &box)
Create a box with larger dimension that will exploit the symmetry for the Sine and Cosine Transforms.
Definition heffte_r2r_executor.h:32
Namespace containing all HeFFTe methods and classes.
Definition heffte_backend_cuda.h:38
static float * pntr(float *p)
Align for float.
Definition heffte_common.h:604
Holds the auxiliary variables needed by each backend.
Definition heffte_common.h:408
A generic container that describes a 3d box of indexes.
Definition heffte_geometry.h:67
std::array< index, 3 > const size
The number of indexes in each direction.
Definition heffte_geometry.h:129
std::array< int, 3 > const order
The order of the dimensions in the k * plane_stride + j * line_stride + i indexing.
Definition heffte_geometry.h:131
index osize(int dimension) const
Get the ordered size of the dimension, i.e., size[order[dimension]].
Definition heffte_geometry.h:123
Pre/Post processing for the Cosine transform type I using the CPU.
Definition heffte_r2r_executor.h:140
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition heffte_r2r_executor.h:143
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition heffte_r2r_executor.h:155
static int compute_extended_length(int length)
Computes the length of the extended signal.
Definition heffte_r2r_executor.h:177
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition heffte_r2r_executor.h:172
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition heffte_r2r_executor.h:160
Pre/Post processing for the Cosine transform using the CPU.
Definition heffte_r2r_executor.h:43
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition heffte_r2r_executor.h:58
static int compute_extended_length(int length)
Computes the length of the extended FFT signal.
Definition heffte_r2r_executor.h:84
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition heffte_r2r_executor.h:46
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition heffte_r2r_executor.h:79
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition heffte_r2r_executor.h:65
Definition heffte_r2r_executor.h:182
Pre/Post processing for the Sine transform using the CPU.
Definition heffte_r2r_executor.h:93
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition heffte_r2r_executor.h:109
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition heffte_r2r_executor.h:115
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition heffte_r2r_executor.h:127
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition heffte_r2r_executor.h:96
static int compute_extended_length(int length)
Computes the length of the extended FFT signal.
Definition heffte_r2r_executor.h:131
Template algorithm for the Sine and Cosine transforms.
Definition heffte_r2r_executor.h:192
int box_size() const override
Returns the size of the box.
Definition heffte_r2r_executor.h:257
size_t workspace_size() const override
Returns the size of the box.
Definition heffte_r2r_executor.h:259
void forward(scalar_type data[], scalar_type workspace[]) const
Forward transform.
Definition heffte_r2r_executor.h:221
virtual void forward(double data[], double *workspace) const override
Forward r2r, double precision.
Definition heffte_r2r_executor.h:266
void backward(scalar_type data[], scalar_type workspace[]) const
Inverse transform.
Definition heffte_r2r_executor.h:235
virtual void backward(double data[], double *workspace) const override
Backward r2r, double precision.
Definition heffte_r2r_executor.h:270
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const box, int dimension)
Construct a plan for batch 1D transforms.
Definition heffte_r2r_executor.h:200
void backward(std::complex< precision > indata[], precision outdata[]) const
Placeholder for template type consistency, should never be called.
Definition heffte_r2r_executor.h:254
virtual void backward(float data[], float *workspace) const override
Backward r2r, single precision.
Definition heffte_r2r_executor.h:268
void forward(precision const[], std::complex< precision >[]) const
Placeholder for template type consistency, should never be called.
Definition heffte_r2r_executor.h:249
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const, int, int)
Construct a plan for batch 2D transforms, not implemented currently.
Definition heffte_r2r_executor.h:212
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const)
Construct a plan for a single 3D transform, not implemented currently.
Definition heffte_r2r_executor.h:216
virtual void forward(float data[], float *workspace) const override
Forward r2r, single precision.
Definition heffte_r2r_executor.h:264
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition heffte_common.h:38