Actual source code: ex9.c

  1: static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
  2:   2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
  3:   required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
  4:   To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";

  6: #include <petscvec.h>
  7: int main(int argc,char **argv)
  8: {
  9:   PetscMPIInt    nproc,grank,mycolor;
 10:   PetscInt       i,n,N = 20,low,high;
 11:   MPI_Comm       subcomm;
 12:   Vec            x  = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
 13:   Vec            yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
 14:   VecScatter     vscat;
 15:   IS             ix,iy;
 16:   PetscBool      iscuda = PETSC_FALSE;      /* Option to use VECCUDA vectors */
 17:   PetscBool      optionflag, compareflag;
 18:   char           vectypename[PETSC_MAX_PATH_LEN];
 19:   PetscBool      world2sub  = PETSC_FALSE;  /* Copy a vector from WORLD to a subcomm? */
 20:   PetscBool      sub2sub    = PETSC_FALSE;  /* Copy a vector from a subcomm to another subcomm? */
 21:   PetscBool      world2subs = PETSC_FALSE;  /* Copy a vector from WORLD to multiple subcomms? */

 23:   PetscInitialize(&argc,&argv,(char*)0,help);
 24:   MPI_Comm_size(PETSC_COMM_WORLD,&nproc);
 25:   MPI_Comm_rank(PETSC_COMM_WORLD,&grank);


 29:   PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL);
 30:   PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL);
 31:   PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL);
 32:   PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag);
 33:   if (optionflag) {
 34:     PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag);
 35:     if (compareflag) iscuda = PETSC_TRUE;
 36:   }

 38:   /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
 39:   mycolor = grank % 3;
 40:   MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm);

 42:   /*===========================================================================
 43:    *  Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
 44:    *  a subcommunicator of PETSC_COMM_WORLD and vice versa.
 45:    *===========================================================================*/
 46:   if (world2sub) {
 47:     VecCreate(PETSC_COMM_WORLD, &x);
 48:     VecSetSizes(x, PETSC_DECIDE, N);
 49:     if (iscuda) {
 50:       VecSetType(x, VECCUDA);
 51:     } else {
 52:       VecSetType(x, VECSTANDARD);
 53:     }
 54:     VecSetUp(x);
 55:     PetscObjectSetName((PetscObject)x,"x_commworld"); /* Give a name to view x clearly */

 57:     /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
 58:     VecGetOwnershipRange(x,&low,&high);
 59:     for (i=low; i<high; i++) {
 60:       PetscScalar val = -i;
 61:       VecSetValue(x,i,val,INSERT_VALUES);
 62:     }
 63:     VecAssemblyBegin(x);
 64:     VecAssemblyEnd(x);

 66:     /* Transfer x to a vector y only defined on subcomm0 and vice versa */
 67:     if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
 68:       Vec         y;
 69:       PetscScalar *yvalue;
 70:        VecCreate(subcomm, &y);
 71:       VecSetSizes(y, PETSC_DECIDE, N);
 72:       if (iscuda) {
 73:         VecSetType(y, VECCUDA);
 74:       } else {
 75:         VecSetType(y, VECSTANDARD);
 76:       }
 77:       VecSetUp(y);
 78:       PetscObjectSetName((PetscObject)y,"y_subcomm_0"); /* Give a name to view y clearly */
 79:       VecGetLocalSize(y,&n);
 80:       if (iscuda) {
 81:         #if defined(PETSC_HAVE_CUDA)
 82:           VecCUDAGetArray(y,&yvalue);
 83:         #endif
 84:       } else {
 85:         VecGetArray(y,&yvalue);
 86:       }
 87:       /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
 88:         Note this is a collective call. All processes have to call it and supply consistent N.
 89:       */
 90:       if (iscuda) {
 91:         #if defined(PETSC_HAVE_CUDA)
 92:           VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg);
 93:         #endif
 94:       } else {
 95:         VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg);
 96:       }

 98:       /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
 99:       VecGetOwnershipRange(yg,&low,&high); /* low, high are global indices */
100:       ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix);
101:       ISDuplicate(ix,&iy);

103:       /* Union of ix's on subcomm0 covers the full range of [0,N) */
104:       VecScatterCreate(x,ix,yg,iy,&vscat);
105:       VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
106:       VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);

108:       /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
109:         VecGetArray must be paired with VecRestoreArray.
110:       */
111:       if (iscuda) {
112:          #if defined(PETSC_HAVE_CUDA)
113:            VecCUDARestoreArray(y,&yvalue);
114:          #endif
115:       } else {
116:         VecRestoreArray(y,&yvalue);
117:       }

119:       /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
120:       VecView(y,PETSC_VIEWER_STDOUT_(subcomm));
121:       VecScale(y,2.0);

123:       /* Send the new y back to x */
124:       VecGetArray(y,&yvalue); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
125:       /* Supply new yvalue to yg without memory copying */
126:       VecPlaceArray(yg,yvalue);
127:       VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
128:       VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
129:       VecResetArray(yg);
130:       if (iscuda) {
131:         #if defined(PETSC_HAVE_CUDA)
132:           VecCUDARestoreArray(y,&yvalue);
133:         #endif
134:       } else {
135:         VecRestoreArray(y,&yvalue);
136:       }

138:       VecDestroy(&y);
139:     } else {
140:       /* Ranks outside of subcomm0 do not supply values to yg */
141:       if (iscuda) {
142:         #if defined(PETSC_HAVE_CUDA)
143:           VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg);
144:         #endif
145:       } else {
146:         VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg);
147:       }

149:       /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
150:         ranks just need to create empty ISes to cheat VecScatterCreate.
151:       */
152:       ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix);
153:       ISDuplicate(ix,&iy);

155:       VecScatterCreate(x,ix,yg,iy,&vscat);
156:       VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
157:       VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);

159:       /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
160:         But they have to call VecScatterBegin/End since these routines are collective.
161:       */
162:       VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
163:       VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
164:     }

166:     VecView(x,PETSC_VIEWER_STDOUT_WORLD);
167:     ISDestroy(&ix);
168:     ISDestroy(&iy);
169:     VecDestroy(&x);
170:     VecDestroy(&yg);
171:     VecScatterDestroy(&vscat);
172:   } /* world2sub */

174:   /*===========================================================================
175:    *  Transfer a vector x defined on subcomm0 to a vector y defined on
176:    *  subcomm1. The two subcomms are not overlapping and their union is
177:    *  not necessarily equal to PETSC_COMM_WORLD.
178:    *===========================================================================*/
179:   if (sub2sub) {
180:     if (mycolor == 0) {
181:       /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
182:       PetscInt          n,N = 22;
183:       Vec               x,xg,yg;
184:       IS                ix,iy;
185:       VecScatter        vscat;
186:       const PetscScalar *xvalue;
187:       MPI_Comm          intercomm,parentcomm;
188:       PetscMPIInt       lrank;

190:       MPI_Comm_rank(subcomm,&lrank);
191:       /* x is on subcomm */
192:       VecCreate(subcomm, &x);
193:       VecSetSizes(x, PETSC_DECIDE, N);
194:       if (iscuda) {
195:         VecSetType(x, VECCUDA);
196:       } else {
197:         VecSetType(x, VECSTANDARD);
198:       }
199:       VecSetUp(x);
200:       VecGetOwnershipRange(x,&low,&high);

202:       /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
203:       for (i=low; i<high; i++) {
204:         PetscScalar val = i;
205:         VecSetValue(x,i,val,INSERT_VALUES);
206:       }
207:       VecAssemblyBegin(x);
208:       VecAssemblyEnd(x);

210:       MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm);

212:       /* Tell rank 0 of subcomm1 the global size of x */
213:       if (!lrank) MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm);

215:       /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
216:         But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
217:       */
218:       MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm);

220:       /* Create a vector xg on parentcomm, which shares memory with x */
221:       VecGetLocalSize(x,&n);
222:       if (iscuda) {
223:         #if defined(PETSC_HAVE_CUDA)
224:           VecCUDAGetArrayRead(x,&xvalue);
225:           VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg);
226:         #endif
227:       } else {
228:         VecGetArrayRead(x,&xvalue);
229:         VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg);
230:       }

232:       /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
233:       if (iscuda) {
234:         #if defined(PETSC_HAVE_CUDA)
235:           VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg);
236:         #endif
237:       } else {
238:         VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg);
239:       }

241:       /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
242:       VecGetOwnershipRange(xg,&low,&high); /* low, high are global indices of xg */
243:       ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix);
244:       ISDuplicate(ix,&iy);
245:       VecScatterCreate(xg,ix,yg,iy,&vscat);

247:       /* Scatter values from xg to yg */
248:       VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);
249:       VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);

251:       /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
252:       if (iscuda) {
253:         #if defined(PETSC_HAVE_CUDA)
254:           VecCUDARestoreArrayRead(x,&xvalue);
255:         #endif
256:       } else {
257:         VecRestoreArrayRead(x,&xvalue);
258:       }
259:       VecDestroy(&x);
260:       ISDestroy(&ix);
261:       ISDestroy(&iy);
262:       VecDestroy(&xg);
263:       VecDestroy(&yg);
264:       VecScatterDestroy(&vscat);
265:       MPI_Comm_free(&intercomm);
266:       MPI_Comm_free(&parentcomm);
267:     } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
268:       PetscInt    n,N;
269:       Vec         y,xg,yg;
270:       IS          ix,iy;
271:       VecScatter  vscat;
272:       PetscScalar *yvalue;
273:       MPI_Comm    intercomm,parentcomm;
274:       PetscMPIInt lrank;

276:       MPI_Comm_rank(subcomm,&lrank);
277:       MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm);

279:       /* Two rank-0 are talking */
280:       if (!lrank) MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE);
281:       /* Rank 0 of subcomm1 bcasts N to its members */
282:       MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm);

284:       /* Create a intracomm Petsc can work on */
285:       MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm);

287:       /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
288:       if (iscuda) {
289:         #if defined(PETSC_HAVE_CUDA)
290:           VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg);
291:         #endif
292:       } else {
293:         VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg);
294:       }

296:       VecCreate(subcomm, &y);
297:       VecSetSizes(y, PETSC_DECIDE, N);
298:       if (iscuda) {
299:         VecSetType(y, VECCUDA);
300:       } else {
301:         VecSetType(y, VECSTANDARD);
302:       }
303:       VecSetUp(y);

305:       PetscObjectSetName((PetscObject)y,"y_subcomm_1"); /* Give a name to view y clearly */
306:       VecGetLocalSize(y,&n);
307:       if (iscuda) {
308:         #if defined(PETSC_HAVE_CUDA)
309:           VecCUDAGetArray(y,&yvalue);
310:         #endif
311:       } else {
312:         VecGetArray(y,&yvalue);
313:       }
314:       /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
315:         created in the same order in subcomm0/1. For example, we can not reverse the order of
316:         creating xg and yg in subcomm1.
317:       */
318:       if (iscuda) {
319:         #if defined(PETSC_HAVE_CUDA)
320:           VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg);
321:         #endif
322:       } else {
323:         VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg);
324:       }

326:       /* Ranks in subcomm0 already specified the full range of the identity map.
327:         ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
328:       */
329:       ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix);
330:       ISDuplicate(ix,&iy);
331:       VecScatterCreate(xg,ix,yg,iy,&vscat);

333:       /* Scatter values from xg to yg */
334:       VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);
335:       VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);

337:       /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
338:       if (iscuda) {
339:         #if defined(PETSC_HAVE_CUDA)
340:           VecCUDARestoreArray(y,&yvalue);
341:         #endif
342:       } else {
343:         VecRestoreArray(y,&yvalue);
344:       }

346:       /* Libraries on subcomm1 can safely use y now, for example, view it */
347:       VecView(y,PETSC_VIEWER_STDOUT_(subcomm));

349:       VecDestroy(&y);
350:       ISDestroy(&ix);
351:       ISDestroy(&iy);
352:       VecDestroy(&xg);
353:       VecDestroy(&yg);
354:       VecScatterDestroy(&vscat);
355:       MPI_Comm_free(&intercomm);
356:       MPI_Comm_free(&parentcomm);
357:     } else if (mycolor == 2) { /* subcomm2 */
358:       /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
359:     }
360:   } /* sub2sub */

362:   /*===========================================================================
363:    *  Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
364:    *  every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
365:    *  as we did in case 1, but that is not efficient. Instead, we use one vecscatter
366:    *  to achieve that.
367:    *===========================================================================*/
368:   if (world2subs) {
369:     Vec         y;
370:     PetscInt    n,N=15,xstart,ystart,low,high;
371:     PetscScalar *yvalue;

373:     /* Initialize x to [0, 1, 2, 3, ..., N-1] */
374:     VecCreate(PETSC_COMM_WORLD, &x);
375:     VecSetSizes(x, PETSC_DECIDE, N);
376:     if (iscuda) {
377:       VecSetType(x, VECCUDA);
378:     } else {
379:       VecSetType(x, VECSTANDARD);
380:     }
381:     VecSetUp(x);
382:     VecGetOwnershipRange(x,&low,&high);
383:     for (i=low; i<high; i++) VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);
384:     VecAssemblyBegin(x);
385:     VecAssemblyEnd(x);

387:     /* Every subcomm has a y as long as x */
388:     VecCreate(subcomm, &y);
389:     VecSetSizes(y, PETSC_DECIDE, N);
390:     if (iscuda) {
391:       VecSetType(y, VECCUDA);
392:     } else {
393:       VecSetType(y, VECSTANDARD);
394:     }
395:     VecSetUp(y);
396:     VecGetLocalSize(y,&n);

398:     /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
399:        Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
400:        necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
401:        0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
402:     */
403:     if (iscuda) {
404:       #if defined(PETSC_HAVE_CUDA)
405:         VecCUDAGetArray(y,&yvalue);
406:         VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg);
407:       #endif
408:     } else {
409:       VecGetArray(y,&yvalue);
410:       VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg);
411:     }
412:     PetscObjectSetName((PetscObject)yg,"yg_on_subcomms"); /* Give a name to view yg clearly */

414:     /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
415:        since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
416:      */
417:     VecGetOwnershipRange(y,&xstart,NULL);
418:     VecGetOwnershipRange(yg,&ystart,NULL);

420:     ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);
421:     ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);
422:     VecScatterCreate(x,ix,yg,iy,&vscat);
423:     VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
424:     VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);

426:     /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
427:     VecView(yg,PETSC_VIEWER_STDOUT_WORLD);
428:     VecDestroy(&yg);

430:     /* Restory yvalue so that processes in subcomm can use y from now on. */
431:     if (iscuda) {
432:       #if defined(PETSC_HAVE_CUDA)
433:         VecCUDARestoreArray(y,&yvalue);
434:       #endif
435:     } else {
436:       VecRestoreArray(y,&yvalue);
437:     }
438:     VecScale(y,3.0);

440:     ISDestroy(&ix); /* One can also destroy ix, iy immediately after VecScatterCreate() */
441:     ISDestroy(&iy);
442:     VecDestroy(&x);
443:     VecDestroy(&y);
444:     VecScatterDestroy(&vscat);
445:   } /* world2subs */

447:   MPI_Comm_free(&subcomm);
448:   PetscFinalize();
449:   return 0;
450: }

452: /*TEST

454:    build:
455:      requires: !defined(PETSC_HAVE_MPIUNI)

457:    testset:
458:      nsize: 7

460:      test:
461:        suffix: 1
462:        args: -world2sub

464:      test:
465:        suffix: 2
466:        args: -sub2sub
467:        # deadlocks with NECMPI and INTELMPI (20210400300)
468:        requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION)

470:      test:
471:        suffix: 3
472:        args: -world2subs

474:      test:
475:        suffix: 4
476:        args: -world2sub -vectype cuda
477:        requires: cuda

479:      test:
480:        suffix: 5
481:        args: -sub2sub -vectype cuda
482:        requires: cuda

484:      test:
485:       suffix: 6
486:       args: -world2subs -vectype cuda
487:       requires: cuda

489:      test:
490:        suffix: 7
491:        args: -world2sub -sf_type neighbor
492:        output_file: output/ex9_1.out
493:        # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
494:        # segfaults with NECMPI
495:        requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI)
496: TEST*/