Actual source code: cufft.cu
2: /*
3: Provides an interface to the CUFFT package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <petscdevice.h>
8: #include <petsc/private/matimpl.h>
10: typedef struct {
11: PetscInt ndim;
12: PetscInt *dim;
13: cufftHandle p_forward, p_backward;
14: cufftComplex *devArray;
15: } Mat_CUFFT;
17: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
18: {
19: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
20: cufftComplex *devArray = cufft->devArray;
21: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
22: PetscScalar *x_array, *y_array;
24: VecGetArray(x, &x_array);
25: VecGetArray(y, &y_array);
26: if (!cufft->p_forward) {
27: /* create a plan, then execute it */
28: switch (ndim) {
29: case 1:
30: cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);
31: break;
32: case 2:
33: cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);
34: break;
35: case 3:
36: cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);
37: break;
38: default:
39: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
40: }
41: }
42: /* transfer to GPU memory */
43: cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
44: /* execute transform */
45: cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);
46: /* transfer from GPU memory */
47: cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
48: VecRestoreArray(y, &y_array);
49: VecRestoreArray(x, &x_array);
50: return 0;
51: }
53: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
54: {
55: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
56: cufftComplex *devArray = cufft->devArray;
57: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
58: PetscScalar *x_array, *y_array;
60: VecGetArray(x, &x_array);
61: VecGetArray(y, &y_array);
62: if (!cufft->p_backward) {
63: /* create a plan, then execute it */
64: switch (ndim) {
65: case 1:
66: cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);
67: break;
68: case 2:
69: cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);
70: break;
71: case 3:
72: cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);
73: break;
74: default:
75: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
76: }
77: }
78: /* transfer to GPU memory */
79: cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
80: /* execute transform */
81: cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);
82: /* transfer from GPU memory */
83: cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
84: VecRestoreArray(y, &y_array);
85: VecRestoreArray(x, &x_array);
86: return 0;
87: }
89: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
90: {
91: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
93: PetscFree(cufft->dim);
94: if (cufft->p_forward) cufftDestroy(cufft->p_forward);
95: if (cufft->p_backward) cufftDestroy(cufft->p_backward);
96: cudaFree(cufft->devArray);
97: PetscFree(A->data);
98: PetscObjectChangeTypeName((PetscObject)A,0);
99: return 0;
100: }
102: /*@
103: MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
105: Collective
107: Input Parameters:
108: + comm - MPI communicator, set to PETSC_COMM_SELF
109: . ndim - the ndim-dimensional transform
110: - dim - array of size ndim, dim[i] contains the vector length in the i-dimension
112: Output Parameter:
113: . A - the matrix
115: Options Database Keys:
116: . -mat_cufft_plannerflags - set CUFFT planner flags
118: Level: intermediate
119: @*/
120: PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
121: {
122: Mat_CUFFT *cufft;
123: PetscInt m = 1;
128: MatCreate(comm, A);
129: for (PetscInt d = 0; d < ndim; ++d) {
131: m *= dim[d];
132: }
133: MatSetSizes(*A, m, m, m, m);
134: PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);
136: PetscNewLog(*A,&cufft);
137: (*A)->data = (void*) cufft;
138: PetscMalloc1(ndim+1, &cufft->dim);
139: PetscArraycpy(cufft->dim, dim, ndim);
141: cufft->ndim = ndim;
142: cufft->p_forward = 0;
143: cufft->p_backward = 0;
144: cufft->dim[ndim] = m;
146: /* GPU memory allocation */
147: cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);
149: (*A)->ops->mult = MatMult_SeqCUFFT;
150: (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
151: (*A)->assembled = PETSC_TRUE;
152: (*A)->ops->destroy = MatDestroy_SeqCUFFT;
154: /* get runtime options ...what options????? */
155: {
158: PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
159: PetscOptionsEnd();
160: }
161: return 0;
162: }