Actual source code: mpikok.kokkos.cxx


  2: /*
  3:    This file contains routines for Parallel vector operations.
  4:  */

  6: #include <petscvec_kokkos.hpp>
  7: #include <petsc/private/vecimpl.h>
  8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>

 11: PetscErrorCode VecDestroy_MPIKokkos(Vec v)
 12: {
 13:   Vec_Kokkos     *veckok = static_cast<Vec_Kokkos*>(v->spptr);

 15:   delete veckok;
 16:   VecDestroy_MPI(v);
 17:   return 0;
 18: }

 20: PetscErrorCode VecNorm_MPIKokkos(Vec xin,NormType type,PetscReal *z)
 21: {
 22:   PetscReal      sum,work = 0.0;

 24:   if (type == NORM_2 || type == NORM_FROBENIUS) {
 25:     VecNorm_SeqKokkos(xin,NORM_2,&work);
 26:     work *= work;
 27:     MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 28:     *z    = PetscSqrtReal(sum);
 29:   } else if (type == NORM_1) {
 30:     /* Find the local part */
 31:     VecNorm_SeqKokkos(xin,NORM_1,&work);
 32:     /* Find the global max */
 33:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 34:   } else if (type == NORM_INFINITY) {
 35:     /* Find the local max */
 36:     VecNorm_SeqKokkos(xin,NORM_INFINITY,&work);
 37:     /* Find the global max */
 38:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
 39:   } else if (type == NORM_1_AND_2) {
 40:     PetscReal temp[2];
 41:     VecNorm_SeqKokkos(xin,NORM_1,temp);
 42:     VecNorm_SeqKokkos(xin,NORM_2,temp+1);
 43:     temp[1] = temp[1]*temp[1];
 44:     MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 45:     z[1] = PetscSqrtReal(z[1]);
 46:   }
 47:   return 0;
 48: }

 50: /* z = y^H x */
 51: PetscErrorCode VecDot_MPIKokkos(Vec xin,Vec yin,PetscScalar *z)
 52: {
 53:   PetscScalar    sum,work;

 55:   VecDot_SeqKokkos(xin,yin,&work);
 56:   MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 57:   *z   = sum;
 58:   return 0;
 59: }

 61: PetscErrorCode VecMDot_MPIKokkos(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
 62: {
 63:   PetscScalar    awork[128],*work = awork;

 65:   if (nv > 128) PetscMalloc1(nv,&work);
 66:   VecMDot_SeqKokkos(xin,nv,y,work);
 67:   MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 68:   if (nv > 128) PetscFree(work);
 69:   return 0;
 70: }

 72: /* z = y^T x */
 73: PetscErrorCode VecTDot_MPIKokkos(Vec xin,Vec yin,PetscScalar *z)
 74: {
 75:   PetscScalar    sum,work;

 77:   VecTDot_SeqKokkos(xin,yin,&work);
 78:   MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 79:   *z   = sum;
 80:   return 0;
 81: }

 83: PetscErrorCode VecMTDot_MPIKokkos(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
 84: {
 85:   PetscScalar    awork[128],*work = awork;

 87:   if (nv > 128) PetscMalloc1(nv,&work);
 88:   VecMTDot_SeqKokkos(xin,nv,y,work);
 89:   MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 90:   if (nv > 128) PetscFree(work);
 91:   return 0;
 92: }

 94: PetscErrorCode VecMax_MPIKokkos(Vec xin,PetscInt *idx,PetscReal *z)
 95: {
 96:   PetscReal      work;

 98:   /* Find the local max */
 99:   VecMax_SeqKokkos(xin,idx,&work);
100: #if defined(PETSC_HAVE_MPIUNI)
101:   *z = work;
102: #else
103:   /* Find the global max */
104:   if (!idx) { /* User does not need idx */
105:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
106:   } else {
107:     struct { PetscReal v; PetscInt i; } in,out;

109:     in.v  = work;
110:     in.i  = *idx + xin->map->rstart;
111:     MPIU_Allreduce(&in,&out,1,MPIU_REAL_INT,MPIU_MAXLOC,PetscObjectComm((PetscObject)xin));
112:     *z    = out.v;
113:     *idx  = out.i;
114:   }
115: #endif
116:   return 0;
117: }

119: PetscErrorCode VecMin_MPIKokkos(Vec xin,PetscInt *idx,PetscReal *z)
120: {
121:   PetscReal      work;

123:   /* Find the local Min */
124:   VecMin_SeqKokkos(xin,idx,&work);
125: #if defined(PETSC_HAVE_MPIUNI)
126:   *z = work;
127: #else
128:   /* Find the global Min */
129:   if (!idx) {
130:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)xin));
131:   } else {
132:     struct { PetscReal v; PetscInt i; } in,out;

134:     in.v  = work;
135:     in.i  = *idx + xin->map->rstart;
136:     MPIU_Allreduce(&in,&out,1,MPIU_REAL_INT,MPIU_MINLOC,PetscObjectComm((PetscObject)xin));
137:     *z    = out.v;
138:     *idx  = out.i;
139:   }
140: #endif
141:   return 0;
142: }

144: PetscErrorCode VecDuplicate_MPIKokkos(Vec win,Vec *vv)
145: {
146:   Vec            v;
147:   Vec_MPI        *vecmpi;
148:   Vec_Kokkos     *veckok;

150:   /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
151:   VecDuplicate_MPI(win,&v); /* after the call, v is a VECMPI, with data zero'ed */
152:   PetscObjectChangeTypeName((PetscObject)v,VECMPIKOKKOS);
153:   PetscMemcpy(v->ops,win->ops,sizeof(struct _VecOps));

155:   /* Build the Vec_Kokkos struct */
156:   vecmpi = static_cast<Vec_MPI*>(v->data);
157:   veckok = new Vec_Kokkos(v->map->n,vecmpi->array);
158:   Kokkos::deep_copy(veckok->v_dual.view_device(),0.0);
159:   v->spptr       = veckok;
160:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
161:   *vv = v;
162:   return 0;
163: }

165: PetscErrorCode VecDotNorm2_MPIKokkos(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
166: {
167:   PetscScalar    work[2],sum[2];

169:   VecDotNorm2_SeqKokkos(s,t,work,work+1);
170:   MPIU_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
171:   *dp  = sum[0];
172:   *nm  = sum[1];
173:   return 0;
174: }

176: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x,IS is,Vec *y)
177: {
178:   VecGetSubVector_Kokkos_Private(x,PETSC_TRUE,is,y);
179:   return 0;
180: }

182: static PetscErrorCode VecSetOps_MPIKokkos(Vec v)
183: {
184:   v->ops->abs                    = VecAbs_SeqKokkos;
185:   v->ops->reciprocal             = VecReciprocal_SeqKokkos;
186:   v->ops->pointwisemult          = VecPointwiseMult_SeqKokkos;
187:   v->ops->setrandom              = VecSetRandom_SeqKokkos;
188:   v->ops->dotnorm2               = VecDotNorm2_MPIKokkos;
189:   v->ops->waxpy                  = VecWAXPY_SeqKokkos;
190:   v->ops->norm                   = VecNorm_MPIKokkos;
191:   v->ops->min                    = VecMin_MPIKokkos;
192:   v->ops->max                    = VecMax_MPIKokkos;
193:   v->ops->sum                    = VecSum_SeqKokkos;
194:   v->ops->shift                  = VecShift_SeqKokkos;
195:   v->ops->scale                  = VecScale_SeqKokkos;
196:   v->ops->copy                   = VecCopy_SeqKokkos;
197:   v->ops->set                    = VecSet_SeqKokkos;
198:   v->ops->swap                   = VecSwap_SeqKokkos;
199:   v->ops->axpy                   = VecAXPY_SeqKokkos;
200:   v->ops->axpby                  = VecAXPBY_SeqKokkos;
201:   v->ops->maxpy                  = VecMAXPY_SeqKokkos;
202:   v->ops->aypx                   = VecAYPX_SeqKokkos;
203:   v->ops->axpbypcz               = VecAXPBYPCZ_SeqKokkos;
204:   v->ops->pointwisedivide        = VecPointwiseDivide_SeqKokkos;
205:   v->ops->placearray             = VecPlaceArray_SeqKokkos;
206:   v->ops->replacearray           = VecReplaceArray_SeqKokkos;
207:   v->ops->resetarray             = VecResetArray_SeqKokkos;

209:   v->ops->dot                    = VecDot_MPIKokkos;
210:   v->ops->tdot                   = VecTDot_MPIKokkos;
211:   v->ops->mdot                   = VecMDot_MPIKokkos;
212:   v->ops->mtdot                  = VecMTDot_MPIKokkos;

214:   v->ops->dot_local              = VecDot_SeqKokkos;
215:   v->ops->tdot_local             = VecTDot_SeqKokkos;
216:   v->ops->mdot_local             = VecMDot_SeqKokkos;
217:   v->ops->mtdot_local            = VecMTDot_SeqKokkos;

219:   v->ops->norm_local             = VecNorm_SeqKokkos;
220:   v->ops->duplicate              = VecDuplicate_MPIKokkos;
221:   v->ops->destroy                = VecDestroy_MPIKokkos;
222:   v->ops->getlocalvector         = VecGetLocalVector_SeqKokkos;
223:   v->ops->restorelocalvector     = VecRestoreLocalVector_SeqKokkos;
224:   v->ops->getlocalvectorread     = VecGetLocalVector_SeqKokkos;
225:   v->ops->restorelocalvectorread = VecRestoreLocalVector_SeqKokkos;
226:   v->ops->getarraywrite          = VecGetArrayWrite_SeqKokkos;
227:   v->ops->getarray               = VecGetArray_SeqKokkos;
228:   v->ops->restorearray           = VecRestoreArray_SeqKokkos;
229:   v->ops->getarrayandmemtype     = VecGetArrayAndMemType_SeqKokkos;
230:   v->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqKokkos;
231:   v->ops->getarraywriteandmemtype= VecGetArrayWriteAndMemType_SeqKokkos;
232:   v->ops->getsubvector           = VecGetSubVector_MPIKokkos;
233:   v->ops->restoresubvector       = VecRestoreSubVector_SeqKokkos;
234:   return 0;
235: }

237: /*MC
238:    VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos

240:    Options Database Keys:
241: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()

243:   Level: beginner

245: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIKokkosWithArray(), VECMPI, VecType, VecCreateMPI()
246: M*/
247: PetscErrorCode VecCreate_MPIKokkos(Vec v)
248: {
249:   Vec_MPI    *vecmpi;
250:   Vec_Kokkos *veckok;

252:   PetscKokkosInitializeCheck();
253:   PetscLayoutSetUp(v->map);
254:   VecCreate_MPI(v);  /* Calloc host array */

256:   vecmpi = static_cast<Vec_MPI*>(v->data);
257:   PetscObjectChangeTypeName((PetscObject)v,VECMPIKOKKOS);
258:   VecSetOps_MPIKokkos(v);
259:   veckok = new Vec_Kokkos(v->map->n,vecmpi->array,NULL); /* Alloc device array but do not init it */
260:   v->spptr = static_cast<void*>(veckok);
261:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
262:   return 0;
263: }

265: /*@C
266:    VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
267:    where the user provides the GPU array space to store the vector values.

269:    Collective

271:    Input Parameters:
272: +  comm  - the MPI communicator to use
273: .  bs    - block size, same meaning as VecSetBlockSize()
274: .  n     - local vector length, cannot be PETSC_DECIDE
275: .  N     - global vector length (or PETSC_DECIDE to have calculated)
276: -  array - the user provided GPU array to store the vector values

278:    Output Parameter:
279: .  vv - the vector

281:    Notes:
282:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
283:    same type as an existing vector.

285:    If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
286:    at a later stage to SET the array for storing the vector values.

288:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
289:    The user should not free the array until the vector is destroyed.

291:    Level: intermediate

293: .seealso: VecCreateSeqKokkosWithArray(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
294:           VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
295:           VecCreateMPI(), VecCreateGhostWithArray(), VecPlaceArray()

297: @*/
298: PetscErrorCode  VecCreateMPIKokkosWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar darray[],Vec *v)
299: {
300:   Vec            w;
301:   Vec_Kokkos     *veckok;
302:   Vec_MPI        *vecmpi;
303:   PetscScalar    *harray;

306:   PetscKokkosInitializeCheck();
307:   PetscSplitOwnership(comm,&n,&N);
308:   VecCreate(comm,&w);
309:   VecSetSizes(w,n,N);
310:   VecSetBlockSize(w,bs);
311:   PetscLayoutSetUp(w->map);

313:   if (std::is_same<DefaultMemorySpace,Kokkos::HostSpace>::value) {harray = const_cast<PetscScalar*>(darray);}
314:   else PetscMalloc1(w->map->n,&harray); /* If device is not the same as host, allocate the host array ourselves */

316:   VecCreate_MPI_Private(w,PETSC_FALSE/*alloc*/,0/*nghost*/,harray); /* Build a sequential vector with provided data */
317:   vecmpi = static_cast<Vec_MPI*>(w->data);

319:   if (!std::is_same<DefaultMemorySpace,Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */

321:   PetscObjectChangeTypeName((PetscObject)w,VECMPIKOKKOS);
322:   VecSetOps_MPIKokkos(w);
323:   veckok = new Vec_Kokkos(n,harray,const_cast<PetscScalar*>(darray));
324:   veckok->v_dual.modify_device(); /* Mark the device is modified */
325:   w->spptr = static_cast<void*>(veckok);
326:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
327:   *v = w;
328:   return 0;
329: }

331: /*
332:    VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
333:    with user-provided arrays on host and device.

335:    Collective

337:    Input Parameter:
338: +  comm - the communicator
339: .  bs - the block size
340: .  n - the local vector length
341: .  N - the global vector length
342: -  harray - host memory where the vector elements are to be stored.
343: -  darray - device memory where the vector elements are to be stored.

345:    Output Parameter:
346: .  v - the vector

348:    Notes:
349:    If there is no device, then harray and darray must be the same.
350:    If n is not zero, then harray and darray must be allocated.
351:    After the call, the created vector is supposed to be in a synchronized state, i.e.,
352:    we suppose harray and darray have the same data.

354:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
355:    The user should not free the array until the vector is destroyed.
356: */
357: PetscErrorCode  VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar harray[],const PetscScalar darray[],Vec *v)
358: {
359:   Vec w;

361:   PetscKokkosInitializeCheck();
362:   if (n) {
365:   }
367:   VecCreateMPIWithArray(comm,bs,n,N,harray,&w);
368:   PetscObjectChangeTypeName((PetscObject)w,VECMPIKOKKOS); /* Change it to Kokkos */
369:   VecSetOps_MPIKokkos(w);
370:   w->spptr = new Vec_Kokkos(n,const_cast<PetscScalar*>(harray),const_cast<PetscScalar*>(darray));
371:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
372:   *v = w;
373:   return 0;
374: }

376: /*MC
377:    VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos

379:    Options Database Keys:
380: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()

382:   Level: beginner

384: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIKokkosWithArray(), VECMPI, VecType, VecCreateMPI()
385: M*/
386: PetscErrorCode VecCreate_Kokkos(Vec v)
387: {
388:   PetscMPIInt    size;

390:   MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
391:   if (size == 1) VecSetType(v,VECSEQKOKKOS);
392:   else VecSetType(v,VECMPIKOKKOS);
393:   return 0;
394: }