Actual source code: fdda.c


  2: #include <petsc/private/dmdaimpl.h>
  3: #include <petscmat.h>
  4: #include <petscbt.h>

  6: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
  8: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
  9: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);

 11: /*
 12:    For ghost i that may be negative or greater than the upper bound this
 13:   maps it into the 0:m-1 range using periodicity
 14: */
 15: #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))

 17: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
 18: {
 19:   PetscInt       i,j,nz,*fill;

 21:   if (!dfill) return 0;

 23:   /* count number nonzeros */
 24:   nz = 0;
 25:   for (i=0; i<w; i++) {
 26:     for (j=0; j<w; j++) {
 27:       if (dfill[w*i+j]) nz++;
 28:     }
 29:   }
 30:   PetscMalloc1(nz + w + 1,&fill);
 31:   /* construct modified CSR storage of nonzero structure */
 32:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 33:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 34:   nz = w + 1;
 35:   for (i=0; i<w; i++) {
 36:     fill[i] = nz;
 37:     for (j=0; j<w; j++) {
 38:       if (dfill[w*i+j]) {
 39:         fill[nz] = j;
 40:         nz++;
 41:       }
 42:     }
 43:   }
 44:   fill[w] = nz;

 46:   *rfill = fill;
 47:   return 0;
 48: }

 50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
 51: {
 52:   PetscInt       nz;

 54:   if (!dfillsparse) return 0;

 56:   /* Determine number of non-zeros */
 57:   nz = (dfillsparse[w] - w - 1);

 59:   /* Allocate space for our copy of the given sparse matrix representation. */
 60:   PetscMalloc1(nz + w + 1,rfill);
 61:   PetscArraycpy(*rfill,dfillsparse,nz+w+1);
 62:   return 0;
 63: }

 65: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 66: {
 67:   PetscInt       i,k,cnt = 1;


 70:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 71:    columns to the left with any nonzeros in them plus 1 */
 72:   PetscCalloc1(dd->w,&dd->ofillcols);
 73:   for (i=0; i<dd->w; i++) {
 74:     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 75:   }
 76:   for (i=0; i<dd->w; i++) {
 77:     if (dd->ofillcols[i]) {
 78:       dd->ofillcols[i] = cnt++;
 79:     }
 80:   }
 81:   return 0;
 82: }

 84: /*@
 85:     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 86:     of the matrix returned by DMCreateMatrix().

 88:     Logically Collective on da

 90:     Input Parameters:
 91: +   da - the distributed array
 92: .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
 93: -   ofill - the fill pattern in the off-diagonal blocks

 95:     Level: developer

 97:     Notes:
 98:     This only makes sense when you are doing multicomponent problems but using the
 99:        MPIAIJ matrix format

101:            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
102:        representing coupling and 0 entries for missing coupling. For example
103: $             dfill[9] = {1, 0, 0,
104: $                         1, 1, 0,
105: $                         0, 1, 1}
106:        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
107:        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
108:        diagonal block).

110:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
111:      can be represented in the dfill, ofill format

113:    Contributed by Glenn Hammond

115: .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()

117: @*/
118: PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
119: {
120:   DM_DA          *dd = (DM_DA*)da->data;

122:   /* save the given dfill and ofill information */
123:   DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);
124:   DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);

126:   /* count nonzeros in ofill columns */
127:   DMDASetBlockFills_Private2(dd);

129:   return 0;
130: }

132: /*@
133:     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
134:     of the matrix returned by DMCreateMatrix(), using sparse representations
135:     of fill patterns.

137:     Logically Collective on da

139:     Input Parameters:
140: +   da - the distributed array
141: .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
142: -   ofill - the sparse fill pattern in the off-diagonal blocks

144:     Level: developer

146:     Notes: This only makes sense when you are doing multicomponent problems but using the
147:        MPIAIJ matrix format

149:            The format for dfill and ofill is a sparse representation of a
150:            dof-by-dof matrix with 1 entries representing coupling and 0 entries
151:            for missing coupling.  The sparse representation is a 1 dimensional
152:            array of length nz + dof + 1, where nz is the number of non-zeros in
153:            the matrix.  The first dof entries in the array give the
154:            starting array indices of each row's items in the rest of the array,
155:            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
156:            and the remaining nz items give the column indices of each of
157:            the 1s within the logical 2D matrix.  Each row's items within
158:            the array are the column indices of the 1s within that row
159:            of the 2D matrix.  PETSc developers may recognize that this is the
160:            same format as that computed by the DMDASetBlockFills_Private()
161:            function from a dense 2D matrix representation.

163:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
164:      can be represented in the dfill, ofill format

166:    Contributed by Philip C. Roth

168: .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()

170: @*/
171: PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
172: {
173:   DM_DA          *dd = (DM_DA*)da->data;

175:   /* save the given dfill and ofill information */
176:   DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);
177:   DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);

179:   /* count nonzeros in ofill columns */
180:   DMDASetBlockFills_Private2(dd);

182:   return 0;
183: }

185: PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
186: {
187:   PetscInt         dim,m,n,p,nc;
188:   DMBoundaryType   bx,by,bz;
189:   MPI_Comm         comm;
190:   PetscMPIInt      size;
191:   PetscBool        isBAIJ;
192:   DM_DA            *dd = (DM_DA*)da->data;

194:   /*
195:                                   m
196:           ------------------------------------------------------
197:          |                                                     |
198:          |                                                     |
199:          |               ----------------------                |
200:          |               |                    |                |
201:       n  |           yn  |                    |                |
202:          |               |                    |                |
203:          |               .---------------------                |
204:          |             (xs,ys)     xn                          |
205:          |            .                                        |
206:          |         (gxs,gys)                                   |
207:          |                                                     |
208:           -----------------------------------------------------
209:   */

211:   /*
212:          nc - number of components per grid point
213:          col - number of colors needed in one direction for single component problem

215:   */
216:   DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);

218:   PetscObjectGetComm((PetscObject)da,&comm);
219:   MPI_Comm_size(comm,&size);
220:   if (ctype == IS_COLORING_LOCAL) {
221:     if (size == 1) {
222:       ctype = IS_COLORING_GLOBAL;
223:     } else if (dim > 1) {
224:       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
225:         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
226:       }
227:     }
228:   }

230:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
231:      matrices is for the blocks, not the individual matrix elements  */
232:   PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);
233:   if (!isBAIJ) PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);
234:   if (!isBAIJ) PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);
235:   if (isBAIJ) {
236:     dd->w  = 1;
237:     dd->xs = dd->xs/nc;
238:     dd->xe = dd->xe/nc;
239:     dd->Xs = dd->Xs/nc;
240:     dd->Xe = dd->Xe/nc;
241:   }

243:   /*
244:      We do not provide a getcoloring function in the DMDA operations because
245:    the basic DMDA does not know about matrices. We think of DMDA as being
246:    more low-level then matrices.
247:   */
248:   if (dim == 1) {
249:     DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);
250:   } else if (dim == 2) {
251:     DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);
252:   } else if (dim == 3) {
253:     DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);
254:   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
255:   if (isBAIJ) {
256:     dd->w  = nc;
257:     dd->xs = dd->xs*nc;
258:     dd->xe = dd->xe*nc;
259:     dd->Xs = dd->Xs*nc;
260:     dd->Xe = dd->Xe*nc;
261:   }
262:   return 0;
263: }

265: /* ---------------------------------------------------------------------------------*/

267: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
268: {
269:   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
270:   PetscInt        ncolors;
271:   MPI_Comm        comm;
272:   DMBoundaryType  bx,by;
273:   DMDAStencilType st;
274:   ISColoringValue *colors;
275:   DM_DA           *dd = (DM_DA*)da->data;

277:   /*
278:          nc - number of components per grid point
279:          col - number of colors needed in one direction for single component problem

281:   */
282:   DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);
283:   col  = 2*s + 1;
284:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
285:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
286:   PetscObjectGetComm((PetscObject)da,&comm);

288:   /* special case as taught to us by Paul Hovland */
289:   if (st == DMDA_STENCIL_STAR && s == 1) {
290:     DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);
291:   } else {
292:     if (ctype == IS_COLORING_GLOBAL) {
293:       if (!dd->localcoloring) {
294:         PetscMalloc1(nc*nx*ny,&colors);
295:         ii   = 0;
296:         for (j=ys; j<ys+ny; j++) {
297:           for (i=xs; i<xs+nx; i++) {
298:             for (k=0; k<nc; k++) {
299:               colors[ii++] = k + nc*((i % col) + col*(j % col));
300:             }
301:           }
302:         }
303:         ncolors = nc + nc*(col-1 + col*(col-1));
304:         ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
305:       }
306:       *coloring = dd->localcoloring;
307:     } else if (ctype == IS_COLORING_LOCAL) {
308:       if (!dd->ghostedcoloring) {
309:         PetscMalloc1(nc*gnx*gny,&colors);
310:         ii   = 0;
311:         for (j=gys; j<gys+gny; j++) {
312:           for (i=gxs; i<gxs+gnx; i++) {
313:             for (k=0; k<nc; k++) {
314:               /* the complicated stuff is to handle periodic boundaries */
315:               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
316:             }
317:           }
318:         }
319:         ncolors = nc + nc*(col - 1 + col*(col-1));
320:         ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
321:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

323:         ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
324:       }
325:       *coloring = dd->ghostedcoloring;
326:     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
327:   }
328:   ISColoringReference(*coloring);
329:   return 0;
330: }

332: /* ---------------------------------------------------------------------------------*/

334: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
335: {
336:   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
337:   PetscInt        ncolors;
338:   MPI_Comm        comm;
339:   DMBoundaryType  bx,by,bz;
340:   DMDAStencilType st;
341:   ISColoringValue *colors;
342:   DM_DA           *dd = (DM_DA*)da->data;

344:   /*
345:          nc - number of components per grid point
346:          col - number of colors needed in one direction for single component problem

348:   */
349:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
350:   col  = 2*s + 1;
351:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
352:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
353:   PetscObjectGetComm((PetscObject)da,&comm);

355:   /* create the coloring */
356:   if (ctype == IS_COLORING_GLOBAL) {
357:     if (!dd->localcoloring) {
358:       PetscMalloc1(nc*nx*ny*nz,&colors);
359:       ii   = 0;
360:       for (k=zs; k<zs+nz; k++) {
361:         for (j=ys; j<ys+ny; j++) {
362:           for (i=xs; i<xs+nx; i++) {
363:             for (l=0; l<nc; l++) {
364:               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
365:             }
366:           }
367:         }
368:       }
369:       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
370:       ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);
371:     }
372:     *coloring = dd->localcoloring;
373:   } else if (ctype == IS_COLORING_LOCAL) {
374:     if (!dd->ghostedcoloring) {
375:       PetscMalloc1(nc*gnx*gny*gnz,&colors);
376:       ii   = 0;
377:       for (k=gzs; k<gzs+gnz; k++) {
378:         for (j=gys; j<gys+gny; j++) {
379:           for (i=gxs; i<gxs+gnx; i++) {
380:             for (l=0; l<nc; l++) {
381:               /* the complicated stuff is to handle periodic boundaries */
382:               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
383:             }
384:           }
385:         }
386:       }
387:       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
388:       ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
389:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
390:     }
391:     *coloring = dd->ghostedcoloring;
392:   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
393:   ISColoringReference(*coloring);
394:   return 0;
395: }

397: /* ---------------------------------------------------------------------------------*/

399: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
400: {
401:   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
402:   PetscInt        ncolors;
403:   MPI_Comm        comm;
404:   DMBoundaryType  bx;
405:   ISColoringValue *colors;
406:   DM_DA           *dd = (DM_DA*)da->data;

408:   /*
409:          nc - number of components per grid point
410:          col - number of colors needed in one direction for single component problem

412:   */
413:   DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
414:   col  = 2*s + 1;
415:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
416:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);
417:   PetscObjectGetComm((PetscObject)da,&comm);

419:   /* create the coloring */
420:   if (ctype == IS_COLORING_GLOBAL) {
421:     if (!dd->localcoloring) {
422:       PetscMalloc1(nc*nx,&colors);
423:       if (dd->ofillcols) {
424:         PetscInt tc = 0;
425:         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
426:         i1 = 0;
427:         for (i=xs; i<xs+nx; i++) {
428:           for (l=0; l<nc; l++) {
429:             if (dd->ofillcols[l] && (i % col)) {
430:               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
431:             } else {
432:               colors[i1++] = l;
433:             }
434:           }
435:         }
436:         ncolors = nc + 2*s*tc;
437:       } else {
438:         i1 = 0;
439:         for (i=xs; i<xs+nx; i++) {
440:           for (l=0; l<nc; l++) {
441:             colors[i1++] = l + nc*(i % col);
442:           }
443:         }
444:         ncolors = nc + nc*(col-1);
445:       }
446:       ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);
447:     }
448:     *coloring = dd->localcoloring;
449:   } else if (ctype == IS_COLORING_LOCAL) {
450:     if (!dd->ghostedcoloring) {
451:       PetscMalloc1(nc*gnx,&colors);
452:       i1   = 0;
453:       for (i=gxs; i<gxs+gnx; i++) {
454:         for (l=0; l<nc; l++) {
455:           /* the complicated stuff is to handle periodic boundaries */
456:           colors[i1++] = l + nc*(SetInRange(i,m) % col);
457:         }
458:       }
459:       ncolors = nc + nc*(col-1);
460:       ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
461:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
462:     }
463:     *coloring = dd->ghostedcoloring;
464:   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
465:   ISColoringReference(*coloring);
466:   return 0;
467: }

469: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
470: {
471:   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
472:   PetscInt        ncolors;
473:   MPI_Comm        comm;
474:   DMBoundaryType  bx,by;
475:   ISColoringValue *colors;
476:   DM_DA           *dd = (DM_DA*)da->data;

478:   /*
479:          nc - number of components per grid point
480:          col - number of colors needed in one direction for single component problem

482:   */
483:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);
484:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
485:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
486:   PetscObjectGetComm((PetscObject)da,&comm);
487:   /* create the coloring */
488:   if (ctype == IS_COLORING_GLOBAL) {
489:     if (!dd->localcoloring) {
490:       PetscMalloc1(nc*nx*ny,&colors);
491:       ii   = 0;
492:       for (j=ys; j<ys+ny; j++) {
493:         for (i=xs; i<xs+nx; i++) {
494:           for (k=0; k<nc; k++) {
495:             colors[ii++] = k + nc*((3*j+i) % 5);
496:           }
497:         }
498:       }
499:       ncolors = 5*nc;
500:       ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
501:     }
502:     *coloring = dd->localcoloring;
503:   } else if (ctype == IS_COLORING_LOCAL) {
504:     if (!dd->ghostedcoloring) {
505:       PetscMalloc1(nc*gnx*gny,&colors);
506:       ii = 0;
507:       for (j=gys; j<gys+gny; j++) {
508:         for (i=gxs; i<gxs+gnx; i++) {
509:           for (k=0; k<nc; k++) {
510:             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
511:           }
512:         }
513:       }
514:       ncolors = 5*nc;
515:       ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
516:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
517:     }
518:     *coloring = dd->ghostedcoloring;
519:   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
520:   return 0;
521: }

523: /* =========================================================================== */
524: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
525: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
526: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat);
527: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
528: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
529: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
530: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
531: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
532: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
533: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
534: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
535: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
536: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
537: extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);

539: /*@C
540:    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix

542:    Logically Collective on mat

544:    Input Parameters:
545: +  mat - the matrix
546: -  da - the da

548:    Level: intermediate

550: @*/
551: PetscErrorCode MatSetupDM(Mat mat,DM da)
552: {
555:   PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
556:   return 0;
557: }

559: PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
560: {
561:   DM                da;
562:   const char        *prefix;
563:   Mat               Anatural;
564:   AO                ao;
565:   PetscInt          rstart,rend,*petsc,i;
566:   IS                is;
567:   MPI_Comm          comm;
568:   PetscViewerFormat format;

570:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
571:   PetscViewerGetFormat(viewer,&format);
572:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return 0;

574:   PetscObjectGetComm((PetscObject)A,&comm);
575:   MatGetDM(A, &da);

578:   DMDAGetAO(da,&ao);
579:   MatGetOwnershipRange(A,&rstart,&rend);
580:   PetscMalloc1(rend-rstart,&petsc);
581:   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
582:   AOApplicationToPetsc(ao,rend-rstart,petsc);
583:   ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);

585:   /* call viewer on natural ordering */
586:   MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);
587:   ISDestroy(&is);
588:   PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);
589:   PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);
590:   PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);
591:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
592:   MatView(Anatural,viewer);
593:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
594:   MatDestroy(&Anatural);
595:   return 0;
596: }

598: PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
599: {
600:   DM             da;
601:   Mat            Anatural,Aapp;
602:   AO             ao;
603:   PetscInt       rstart,rend,*app,i,m,n,M,N;
604:   IS             is;
605:   MPI_Comm       comm;

607:   PetscObjectGetComm((PetscObject)A,&comm);
608:   MatGetDM(A, &da);

611:   /* Load the matrix in natural ordering */
612:   MatCreate(PetscObjectComm((PetscObject)A),&Anatural);
613:   MatSetType(Anatural,((PetscObject)A)->type_name);
614:   MatGetSize(A,&M,&N);
615:   MatGetLocalSize(A,&m,&n);
616:   MatSetSizes(Anatural,m,n,M,N);
617:   MatLoad(Anatural,viewer);

619:   /* Map natural ordering to application ordering and create IS */
620:   DMDAGetAO(da,&ao);
621:   MatGetOwnershipRange(Anatural,&rstart,&rend);
622:   PetscMalloc1(rend-rstart,&app);
623:   for (i=rstart; i<rend; i++) app[i-rstart] = i;
624:   AOPetscToApplication(ao,rend-rstart,app);
625:   ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);

627:   /* Do permutation and replace header */
628:   MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);
629:   MatHeaderReplace(A,&Aapp);
630:   ISDestroy(&is);
631:   MatDestroy(&Anatural);
632:   return 0;
633: }

635: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
636: {
637:   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
638:   Mat            A;
639:   MPI_Comm       comm;
640:   MatType        Atype;
641:   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
642:   MatType        mtype;
643:   PetscMPIInt    size;
644:   DM_DA          *dd = (DM_DA*)da->data;

646:   MatInitializePackage();
647:   mtype = da->mattype;

649:   /*
650:                                   m
651:           ------------------------------------------------------
652:          |                                                     |
653:          |                                                     |
654:          |               ----------------------                |
655:          |               |                    |                |
656:       n  |           ny  |                    |                |
657:          |               |                    |                |
658:          |               .---------------------                |
659:          |             (xs,ys)     nx                          |
660:          |            .                                        |
661:          |         (gxs,gys)                                   |
662:          |                                                     |
663:           -----------------------------------------------------
664:   */

666:   /*
667:          nc - number of components per grid point
668:          col - number of colors needed in one direction for single component problem

670:   */
671:   M   = dd->M;
672:   N   = dd->N;
673:   P   = dd->P;
674:   dim = da->dim;
675:   dof = dd->w;
676:   /* DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL); */
677:   DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);
678:   PetscObjectGetComm((PetscObject)da,&comm);
679:   MatCreate(comm,&A);
680:   MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);
681:   MatSetType(A,mtype);
682:   MatSetFromOptions(A);
683:   if (dof*nx*ny*nz < da->bind_below) {
684:     MatSetBindingPropagates(A,PETSC_TRUE);
685:     MatBindToCPU(A,PETSC_TRUE);
686:   }
687:   MatSetDM(A,da);
688:   if (da->structure_only) {
689:     MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);
690:   }
691:   MatGetType(A,&Atype);
692:   /*
693:      We do not provide a getmatrix function in the DMDA operations because
694:    the basic DMDA does not know about matrices. We think of DMDA as being more
695:    more low-level than matrices. This is kind of cheating but, cause sometimes
696:    we think of DMDA has higher level than matrices.

698:      We could switch based on Atype (or mtype), but we do not since the
699:    specialized setting routines depend only on the particular preallocation
700:    details of the matrix, not the type itself.
701:   */
702:   PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);
703:   if (!aij) {
704:     PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);
705:   }
706:   if (!aij) {
707:     PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);
708:     if (!baij) {
709:       PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);
710:     }
711:     if (!baij) {
712:       PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);
713:       if (!sbaij) {
714:         PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);
715:       }
716:       if (!sbaij) {
717:         PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);
718:         if (!sell) {
719:           PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);
720:         }
721:       }
722:       if (!sell) {
723:         PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);
724:       }
725:     }
726:   }
727:   if (aij) {
728:     if (dim == 1) {
729:       if (dd->ofill) {
730:         DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);
731:       } else {
732:         DMBoundaryType bx;
733:         PetscMPIInt  size;
734:         DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);
735:         MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);
736:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
737:           DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A);
738:         } else {
739:           DMCreateMatrix_DA_1d_MPIAIJ(da,A);
740:         }
741:       }
742:     } else if (dim == 2) {
743:       if (dd->ofill) {
744:         DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);
745:       } else {
746:         DMCreateMatrix_DA_2d_MPIAIJ(da,A);
747:       }
748:     } else if (dim == 3) {
749:       if (dd->ofill) {
750:         DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);
751:       } else {
752:         DMCreateMatrix_DA_3d_MPIAIJ(da,A);
753:       }
754:     }
755:   } else if (baij) {
756:     if (dim == 2) {
757:       DMCreateMatrix_DA_2d_MPIBAIJ(da,A);
758:     } else if (dim == 3) {
759:       DMCreateMatrix_DA_3d_MPIBAIJ(da,A);
760:     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
761:   } else if (sbaij) {
762:     if (dim == 2) {
763:       DMCreateMatrix_DA_2d_MPISBAIJ(da,A);
764:     } else if (dim == 3) {
765:       DMCreateMatrix_DA_3d_MPISBAIJ(da,A);
766:     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
767:   } else if (sell) {
768:      if (dim == 2) {
769:        DMCreateMatrix_DA_2d_MPISELL(da,A);
770:      } else if (dim == 3) {
771:        DMCreateMatrix_DA_3d_MPISELL(da,A);
772:      } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
773:   } else if (is) {
774:     DMCreateMatrix_DA_IS(da,A);
775:   } else {
776:     ISLocalToGlobalMapping ltog;

778:     MatSetBlockSize(A,dof);
779:     MatSetUp(A);
780:     DMGetLocalToGlobalMapping(da,&ltog);
781:     MatSetLocalToGlobalMapping(A,ltog,ltog);
782:   }
783:   DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
784:   MatSetStencil(A,dim,dims,starts,dof);
785:   MatSetDM(A,da);
786:   MPI_Comm_size(comm,&size);
787:   if (size > 1) {
788:     /* change viewer to display matrix in natural ordering */
789:     MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
790:     MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
791:   }
792:   *J = A;
793:   return 0;
794: }

796: /* ---------------------------------------------------------------------------------*/
797: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);

799: PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
800: {
801:   DM_DA                  *da = (DM_DA*)dm->data;
802:   Mat                    lJ,P;
803:   ISLocalToGlobalMapping ltog;
804:   IS                     is;
805:   PetscBT                bt;
806:   const PetscInt         *e_loc,*idx;
807:   PetscInt               i,nel,nen,nv,dof,*gidx,n,N;

809:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
810:      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
811:   dof  = da->w;
812:   MatSetBlockSize(J,dof);
813:   DMGetLocalToGlobalMapping(dm,&ltog);

815:   /* flag local elements indices in local DMDA numbering */
816:   ISLocalToGlobalMappingGetSize(ltog,&nv);
817:   PetscBTCreate(nv/dof,&bt);
818:   DMDAGetElements(dm,&nel,&nen,&e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
819:   for (i=0;i<nel*nen;i++) PetscBTSet(bt,e_loc[i]);

821:   /* filter out (set to -1) the global indices not used by the local elements */
822:   PetscMalloc1(nv/dof,&gidx);
823:   ISLocalToGlobalMappingGetBlockIndices(ltog,&idx);
824:   PetscArraycpy(gidx,idx,nv/dof);
825:   ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx);
826:   for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1;
827:   PetscBTDestroy(&bt);
828:   ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is);
829:   ISLocalToGlobalMappingCreateIS(is,&ltog);
830:   MatSetLocalToGlobalMapping(J,ltog,ltog);
831:   ISLocalToGlobalMappingDestroy(&ltog);
832:   ISDestroy(&is);

834:   /* Preallocation */
835:   if (dm->prealloc_skip) {
836:     MatSetUp(J);
837:   } else {
838:     MatISGetLocalMat(J,&lJ);
839:     MatGetLocalToGlobalMapping(lJ,&ltog,NULL);
840:     MatCreate(PetscObjectComm((PetscObject)lJ),&P);
841:     MatSetType(P,MATPREALLOCATOR);
842:     MatSetLocalToGlobalMapping(P,ltog,ltog);
843:     MatGetSize(lJ,&N,NULL);
844:     MatGetLocalSize(lJ,&n,NULL);
845:     MatSetSizes(P,n,n,N,N);
846:     MatSetBlockSize(P,dof);
847:     MatSetUp(P);
848:     for (i=0;i<nel;i++) {
849:       MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES);
850:     }
851:     MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ);
852:     MatISRestoreLocalMat(J,&lJ);
853:     DMDARestoreElements(dm,&nel,&nen,&e_loc);
854:     MatDestroy(&P);

856:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
857:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
858:   }
859:   return 0;
860: }

862: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
863: {
864:   PetscErrorCode         ierr;
865:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
866:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
867:   MPI_Comm               comm;
868:   PetscScalar            *values;
869:   DMBoundaryType         bx,by;
870:   ISLocalToGlobalMapping ltog;
871:   DMDAStencilType        st;

873:   /*
874:          nc - number of components per grid point
875:          col - number of colors needed in one direction for single component problem

877:   */
878:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
879:   col  = 2*s + 1;
880:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
881:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
882:   PetscObjectGetComm((PetscObject)da,&comm);

884:   PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
885:   DMGetLocalToGlobalMapping(da,&ltog);

887:   MatSetBlockSize(J,nc);
888:   /* determine the matrix preallocation information */
889:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
890:   for (i=xs; i<xs+nx; i++) {

892:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
893:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

895:     for (j=ys; j<ys+ny; j++) {
896:       slot = i - gxs + gnx*(j - gys);

898:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
899:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

901:       cnt = 0;
902:       for (k=0; k<nc; k++) {
903:         for (l=lstart; l<lend+1; l++) {
904:           for (p=pstart; p<pend+1; p++) {
905:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
906:               cols[cnt++] = k + nc*(slot + gnx*l + p);
907:             }
908:           }
909:         }
910:         rows[k] = k + nc*(slot);
911:       }
912:       MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
913:     }
914:   }
915:   MatSetBlockSize(J,nc);
916:   MatSeqSELLSetPreallocation(J,0,dnz);
917:   MatMPISELLSetPreallocation(J,0,dnz,0,onz);
918:   MatPreallocateFinalize(dnz,onz);

920:   MatSetLocalToGlobalMapping(J,ltog,ltog);

922:   /*
923:     For each node in the grid: we get the neighbors in the local (on processor ordering
924:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
925:     PETSc ordering.
926:   */
927:   if (!da->prealloc_only) {
928:     PetscCalloc1(col*col*nc*nc,&values);
929:     for (i=xs; i<xs+nx; i++) {

931:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
932:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

934:       for (j=ys; j<ys+ny; j++) {
935:         slot = i - gxs + gnx*(j - gys);

937:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
938:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

940:         cnt = 0;
941:         for (k=0; k<nc; k++) {
942:           for (l=lstart; l<lend+1; l++) {
943:             for (p=pstart; p<pend+1; p++) {
944:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
945:                 cols[cnt++] = k + nc*(slot + gnx*l + p);
946:               }
947:             }
948:           }
949:           rows[k] = k + nc*(slot);
950:         }
951:         MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
952:       }
953:     }
954:     PetscFree(values);
955:     /* do not copy values to GPU since they are all zero and not yet needed there */
956:     MatBindToCPU(J,PETSC_TRUE);
957:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
958:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
959:     MatBindToCPU(J,PETSC_FALSE);
960:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
961:   }
962:   PetscFree2(rows,cols);
963:   return 0;
964: }

966: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
967: {
968:   PetscErrorCode         ierr;
969:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
970:   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
971:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
972:   MPI_Comm               comm;
973:   PetscScalar            *values;
974:   DMBoundaryType         bx,by,bz;
975:   ISLocalToGlobalMapping ltog;
976:   DMDAStencilType        st;

978:   /*
979:          nc - number of components per grid point
980:          col - number of colors needed in one direction for single component problem

982:   */
983:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
984:   col  = 2*s + 1;
985:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
986:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
987:   PetscObjectGetComm((PetscObject)da,&comm);

989:   PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
990:   DMGetLocalToGlobalMapping(da,&ltog);

992:   MatSetBlockSize(J,nc);
993:   /* determine the matrix preallocation information */
994:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
995:   for (i=xs; i<xs+nx; i++) {
996:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
998:     for (j=ys; j<ys+ny; j++) {
999:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1000:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1001:       for (k=zs; k<zs+nz; k++) {
1002:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1003:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1005:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1007:         cnt = 0;
1008:         for (l=0; l<nc; l++) {
1009:           for (ii=istart; ii<iend+1; ii++) {
1010:             for (jj=jstart; jj<jend+1; jj++) {
1011:               for (kk=kstart; kk<kend+1; kk++) {
1012:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1013:                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1014:                 }
1015:               }
1016:             }
1017:           }
1018:           rows[l] = l + nc*(slot);
1019:         }
1020:         MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1021:       }
1022:     }
1023:   }
1024:   MatSetBlockSize(J,nc);
1025:   MatSeqSELLSetPreallocation(J,0,dnz);
1026:   MatMPISELLSetPreallocation(J,0,dnz,0,onz);
1027:   MatPreallocateFinalize(dnz,onz);
1028:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1030:   /*
1031:     For each node in the grid: we get the neighbors in the local (on processor ordering
1032:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1033:     PETSc ordering.
1034:   */
1035:   if (!da->prealloc_only) {
1036:     PetscCalloc1(col*col*col*nc*nc*nc,&values);
1037:     for (i=xs; i<xs+nx; i++) {
1038:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1039:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1040:       for (j=ys; j<ys+ny; j++) {
1041:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1042:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1043:         for (k=zs; k<zs+nz; k++) {
1044:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1045:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1047:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1049:           cnt = 0;
1050:           for (l=0; l<nc; l++) {
1051:             for (ii=istart; ii<iend+1; ii++) {
1052:               for (jj=jstart; jj<jend+1; jj++) {
1053:                 for (kk=kstart; kk<kend+1; kk++) {
1054:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1055:                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1056:                   }
1057:                 }
1058:               }
1059:             }
1060:             rows[l] = l + nc*(slot);
1061:           }
1062:           MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1063:         }
1064:       }
1065:     }
1066:     PetscFree(values);
1067:     /* do not copy values to GPU since they are all zero and not yet needed there */
1068:     MatBindToCPU(J,PETSC_TRUE);
1069:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1070:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1071:     MatBindToCPU(J,PETSC_FALSE);
1072:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1073:   }
1074:   PetscFree2(rows,cols);
1075:   return 0;
1076: }

1078: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1079: {
1080:   PetscErrorCode         ierr;
1081:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1082:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1083:   MPI_Comm               comm;
1084:   DMBoundaryType         bx,by;
1085:   ISLocalToGlobalMapping ltog,mltog;
1086:   DMDAStencilType        st;
1087:   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;

1089:   /*
1090:          nc - number of components per grid point
1091:          col - number of colors needed in one direction for single component problem

1093:   */
1094:   DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);
1095:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1096:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1097:   }
1098:   col  = 2*s + 1;
1099:   /*
1100:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1101:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1102:   */
1103:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1104:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1105:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1106:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1107:   PetscObjectGetComm((PetscObject)da,&comm);

1109:   PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
1110:   DMGetLocalToGlobalMapping(da,&ltog);

1112:   MatSetBlockSize(J,nc);
1113:   /* determine the matrix preallocation information */
1114:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1115:   for (i=xs; i<xs+nx; i++) {
1116:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1117:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1119:     for (j=ys; j<ys+ny; j++) {
1120:       slot = i - gxs + gnx*(j - gys);

1122:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1123:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1125:       cnt = 0;
1126:       for (k=0; k<nc; k++) {
1127:         for (l=lstart; l<lend+1; l++) {
1128:           for (p=pstart; p<pend+1; p++) {
1129:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1130:               cols[cnt++] = k + nc*(slot + gnx*l + p);
1131:             }
1132:           }
1133:         }
1134:         rows[k] = k + nc*(slot);
1135:       }
1136:       if (removedups) {
1137:         MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1138:       } else {
1139:         MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1140:       }
1141:     }
1142:   }
1143:   MatSetBlockSize(J,nc);
1144:   MatSeqAIJSetPreallocation(J,0,dnz);
1145:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1146:   MatPreallocateFinalize(dnz,onz);
1147:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1148:   if (!mltog) {
1149:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1150:   }

1152:   /*
1153:     For each node in the grid: we get the neighbors in the local (on processor ordering
1154:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1155:     PETSc ordering.
1156:   */
1157:   if (!da->prealloc_only) {
1158:     for (i=xs; i<xs+nx; i++) {

1160:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1161:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1163:       for (j=ys; j<ys+ny; j++) {
1164:         slot = i - gxs + gnx*(j - gys);

1166:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1167:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1169:         cnt = 0;
1170:         for (l=lstart; l<lend+1; l++) {
1171:           for (p=pstart; p<pend+1; p++) {
1172:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1173:               cols[cnt++] = nc*(slot + gnx*l + p);
1174:               for (k=1; k<nc; k++) {
1175:                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1176:               }
1177:             }
1178:           }
1179:         }
1180:         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1181:         MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1182:       }
1183:     }
1184:     /* do not copy values to GPU since they are all zero and not yet needed there */
1185:     MatBoundToCPU(J,&alreadyboundtocpu);
1186:     MatBindToCPU(J,PETSC_TRUE);
1187:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1188:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1189:     if (!alreadyboundtocpu) MatBindToCPU(J,PETSC_FALSE);
1190:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1191:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1192:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1193:     }
1194:   }
1195:   PetscFree2(rows,cols);
1196:   return 0;
1197: }

1199: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1200: {
1201:   PetscErrorCode         ierr;
1202:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1203:   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1204:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1205:   DM_DA                  *dd = (DM_DA*)da->data;
1206:   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1207:   MPI_Comm               comm;
1208:   DMBoundaryType         bx,by;
1209:   ISLocalToGlobalMapping ltog;
1210:   DMDAStencilType        st;
1211:   PetscBool              removedups = PETSC_FALSE;

1213:   /*
1214:          nc - number of components per grid point
1215:          col - number of colors needed in one direction for single component problem

1217:   */
1218:   DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);
1219:   col  = 2*s + 1;
1220:   /*
1221:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1222:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1223:   */
1224:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1225:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1226:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1227:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1228:   PetscObjectGetComm((PetscObject)da,&comm);

1230:   PetscMalloc1(col*col*nc,&cols);
1231:   DMGetLocalToGlobalMapping(da,&ltog);

1233:   MatSetBlockSize(J,nc);
1234:   /* determine the matrix preallocation information */
1235:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1236:   for (i=xs; i<xs+nx; i++) {

1238:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1239:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1241:     for (j=ys; j<ys+ny; j++) {
1242:       slot = i - gxs + gnx*(j - gys);

1244:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1245:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1247:       for (k=0; k<nc; k++) {
1248:         cnt = 0;
1249:         for (l=lstart; l<lend+1; l++) {
1250:           for (p=pstart; p<pend+1; p++) {
1251:             if (l || p) {
1252:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1253:                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1254:               }
1255:             } else {
1256:               if (dfill) {
1257:                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1258:               } else {
1259:                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1260:               }
1261:             }
1262:           }
1263:         }
1264:         row    = k + nc*(slot);
1265:         maxcnt = PetscMax(maxcnt,cnt);
1266:         if (removedups) {
1267:           MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1268:         } else {
1269:           MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1270:         }
1271:       }
1272:     }
1273:   }
1274:   MatSeqAIJSetPreallocation(J,0,dnz);
1275:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1276:   MatPreallocateFinalize(dnz,onz);
1277:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1279:   /*
1280:     For each node in the grid: we get the neighbors in the local (on processor ordering
1281:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1282:     PETSc ordering.
1283:   */
1284:   if (!da->prealloc_only) {
1285:     for (i=xs; i<xs+nx; i++) {

1287:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1288:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1290:       for (j=ys; j<ys+ny; j++) {
1291:         slot = i - gxs + gnx*(j - gys);

1293:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1294:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1296:         for (k=0; k<nc; k++) {
1297:           cnt = 0;
1298:           for (l=lstart; l<lend+1; l++) {
1299:             for (p=pstart; p<pend+1; p++) {
1300:               if (l || p) {
1301:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1302:                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1303:                 }
1304:               } else {
1305:                 if (dfill) {
1306:                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1307:                 } else {
1308:                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1309:                 }
1310:               }
1311:             }
1312:           }
1313:           row  = k + nc*(slot);
1314:           MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1315:         }
1316:       }
1317:     }
1318:     /* do not copy values to GPU since they are all zero and not yet needed there */
1319:     MatBindToCPU(J,PETSC_TRUE);
1320:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1321:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1322:     MatBindToCPU(J,PETSC_FALSE);
1323:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1324:   }
1325:   PetscFree(cols);
1326:   return 0;
1327: }

1329: /* ---------------------------------------------------------------------------------*/

1331: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1332: {
1333:   PetscErrorCode         ierr;
1334:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1335:   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1336:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1337:   MPI_Comm               comm;
1338:   DMBoundaryType         bx,by,bz;
1339:   ISLocalToGlobalMapping ltog,mltog;
1340:   DMDAStencilType        st;
1341:   PetscBool              removedups = PETSC_FALSE;

1343:   /*
1344:          nc - number of components per grid point
1345:          col - number of colors needed in one direction for single component problem

1347:   */
1348:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1349:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1350:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1351:   }
1352:   col  = 2*s + 1;

1354:   /*
1355:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1356:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1357:   */
1358:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1359:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1360:   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;

1362:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1363:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1364:   PetscObjectGetComm((PetscObject)da,&comm);

1366:   PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1367:   DMGetLocalToGlobalMapping(da,&ltog);

1369:   MatSetBlockSize(J,nc);
1370:   /* determine the matrix preallocation information */
1371:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1372:   for (i=xs; i<xs+nx; i++) {
1373:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1374:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1375:     for (j=ys; j<ys+ny; j++) {
1376:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1377:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1378:       for (k=zs; k<zs+nz; k++) {
1379:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1380:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1382:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1384:         cnt = 0;
1385:         for (l=0; l<nc; l++) {
1386:           for (ii=istart; ii<iend+1; ii++) {
1387:             for (jj=jstart; jj<jend+1; jj++) {
1388:               for (kk=kstart; kk<kend+1; kk++) {
1389:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1390:                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1391:                 }
1392:               }
1393:             }
1394:           }
1395:           rows[l] = l + nc*(slot);
1396:         }
1397:         if (removedups) {
1398:           MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1399:         } else {
1400:           MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1401:         }
1402:       }
1403:     }
1404:   }
1405:   MatSetBlockSize(J,nc);
1406:   MatSeqAIJSetPreallocation(J,0,dnz);
1407:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1408:   MatPreallocateFinalize(dnz,onz);
1409:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1410:   if (!mltog) {
1411:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1412:   }

1414:   /*
1415:     For each node in the grid: we get the neighbors in the local (on processor ordering
1416:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1417:     PETSc ordering.
1418:   */
1419:   if (!da->prealloc_only) {
1420:     for (i=xs; i<xs+nx; i++) {
1421:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1422:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1423:       for (j=ys; j<ys+ny; j++) {
1424:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1425:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1426:         for (k=zs; k<zs+nz; k++) {
1427:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1428:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1430:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1432:           cnt = 0;
1433:           for (kk=kstart; kk<kend+1; kk++) {
1434:             for (jj=jstart; jj<jend+1; jj++) {
1435:               for (ii=istart; ii<iend+1; ii++) {
1436:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1437:                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1438:                     for (l=1; l<nc; l++) {
1439:                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1440:                   }
1441:                 }
1442:               }
1443:             }
1444:           }
1445:           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1446:           MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1447:         }
1448:       }
1449:     }
1450:     /* do not copy values to GPU since they are all zero and not yet needed there */
1451:     MatBindToCPU(J,PETSC_TRUE);
1452:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1453:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1454:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1455:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1456:     }
1457:     MatBindToCPU(J,PETSC_FALSE);
1458:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1459:   }
1460:   PetscFree2(rows,cols);
1461:   return 0;
1462: }

1464: /* ---------------------------------------------------------------------------------*/

1466: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1467: {
1468:   DM_DA                  *dd = (DM_DA*)da->data;
1469:   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1470:   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1471:   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1472:   DMBoundaryType         bx;
1473:   ISLocalToGlobalMapping ltog;
1474:   PetscMPIInt            rank,size;

1476:   MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);
1477:   MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);

1479:   /*
1480:          nc - number of components per grid point

1482:   */
1483:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1485:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1486:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1488:   MatSetBlockSize(J,nc);
1489:   PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);

1491:   /*
1492:         note should be smaller for first and last process with no periodic
1493:         does not handle dfill
1494:   */
1495:   cnt = 0;
1496:   /* coupling with process to the left */
1497:   for (i=0; i<s; i++) {
1498:     for (j=0; j<nc; j++) {
1499:       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1500:       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1501:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1502:         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1503:         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1504:       }
1505:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1506:       cnt++;
1507:     }
1508:   }
1509:   for (i=s; i<nx-s; i++) {
1510:     for (j=0; j<nc; j++) {
1511:       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1512:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1513:       cnt++;
1514:     }
1515:   }
1516:   /* coupling with process to the right */
1517:   for (i=nx-s; i<nx; i++) {
1518:     for (j=0; j<nc; j++) {
1519:       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1520:       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1521:       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1522:         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1523:         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1524:       }
1525:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1526:       cnt++;
1527:     }
1528:   }

1530:   MatSeqAIJSetPreallocation(J,0,cols);
1531:   MatMPIAIJSetPreallocation(J,0,cols,0,ocols);
1532:   PetscFree2(cols,ocols);

1534:   DMGetLocalToGlobalMapping(da,&ltog);
1535:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1537:   /*
1538:     For each node in the grid: we get the neighbors in the local (on processor ordering
1539:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1540:     PETSc ordering.
1541:   */
1542:   if (!da->prealloc_only) {
1543:     PetscMalloc1(maxcnt,&cols);
1544:     row = xs*nc;
1545:     /* coupling with process to the left */
1546:     for (i=xs; i<xs+s; i++) {
1547:       for (j=0; j<nc; j++) {
1548:         cnt = 0;
1549:         if (rank) {
1550:           for (l=0; l<s; l++) {
1551:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1552:           }
1553:         }
1554:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1555:           for (l=0; l<s; l++) {
1556:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1557:           }
1558:         }
1559:         if (dfill) {
1560:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1561:             cols[cnt++] = i*nc + dfill[k];
1562:           }
1563:         } else {
1564:           for (k=0; k<nc; k++) {
1565:             cols[cnt++] = i*nc + k;
1566:           }
1567:         }
1568:         for (l=0; l<s; l++) {
1569:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1570:         }
1571:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1572:         row++;
1573:       }
1574:     }
1575:     for (i=xs+s; i<xs+nx-s; i++) {
1576:       for (j=0; j<nc; j++) {
1577:         cnt = 0;
1578:         for (l=0; l<s; l++) {
1579:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1580:         }
1581:         if (dfill) {
1582:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1583:             cols[cnt++] = i*nc + dfill[k];
1584:           }
1585:         } else {
1586:           for (k=0; k<nc; k++) {
1587:             cols[cnt++] = i*nc + k;
1588:           }
1589:         }
1590:         for (l=0; l<s; l++) {
1591:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1592:         }
1593:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1594:         row++;
1595:       }
1596:     }
1597:     /* coupling with process to the right */
1598:     for (i=xs+nx-s; i<xs+nx; i++) {
1599:       for (j=0; j<nc; j++) {
1600:         cnt = 0;
1601:         for (l=0; l<s; l++) {
1602:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1603:         }
1604:         if (dfill) {
1605:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1606:             cols[cnt++] = i*nc + dfill[k];
1607:           }
1608:         } else {
1609:           for (k=0; k<nc; k++) {
1610:             cols[cnt++] = i*nc + k;
1611:           }
1612:         }
1613:         if (rank < size-1) {
1614:           for (l=0; l<s; l++) {
1615:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1616:           }
1617:         }
1618:         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1619:           for (l=0; l<s; l++) {
1620:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1621:           }
1622:         }
1623:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1624:         row++;
1625:       }
1626:     }
1627:     PetscFree(cols);
1628:     /* do not copy values to GPU since they are all zero and not yet needed there */
1629:     MatBindToCPU(J,PETSC_TRUE);
1630:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1631:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1632:     MatBindToCPU(J,PETSC_FALSE);
1633:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1634:   }
1635:   return 0;
1636: }

1638: /* ---------------------------------------------------------------------------------*/

1640: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1641: {
1642:   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1643:   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1644:   PetscInt               istart,iend;
1645:   DMBoundaryType         bx;
1646:   ISLocalToGlobalMapping ltog,mltog;

1648:   /*
1649:          nc - number of components per grid point
1650:          col - number of colors needed in one direction for single component problem

1652:   */
1653:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1654:   if (bx == DM_BOUNDARY_NONE) {
1655:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1656:   }
1657:   col  = 2*s + 1;

1659:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1660:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1662:   MatSetBlockSize(J,nc);
1663:   MatSeqAIJSetPreallocation(J,col*nc,NULL);
1664:   MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);

1666:   DMGetLocalToGlobalMapping(da,&ltog);
1667:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1668:   if (!mltog) {
1669:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1670:   }

1672:   /*
1673:     For each node in the grid: we get the neighbors in the local (on processor ordering
1674:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1675:     PETSc ordering.
1676:   */
1677:   if (!da->prealloc_only) {
1678:     PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1679:     for (i=xs; i<xs+nx; i++) {
1680:       istart = PetscMax(-s,gxs - i);
1681:       iend   = PetscMin(s,gxs + gnx - i - 1);
1682:       slot   = i - gxs;

1684:       cnt = 0;
1685:       for (i1=istart; i1<iend+1; i1++) {
1686:         cols[cnt++] = nc*(slot + i1);
1687:         for (l=1; l<nc; l++) {
1688:           cols[cnt] = 1 + cols[cnt-1];cnt++;
1689:         }
1690:       }
1691:       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1692:       MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1693:     }
1694:     /* do not copy values to GPU since they are all zero and not yet needed there */
1695:     MatBindToCPU(J,PETSC_TRUE);
1696:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1697:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1698:     if (bx == DM_BOUNDARY_NONE) {
1699:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1700:     }
1701:     MatBindToCPU(J,PETSC_FALSE);
1702:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1703:     PetscFree2(rows,cols);
1704:   }
1705:   return 0;
1706: }

1708: /* ---------------------------------------------------------------------------------*/

1710: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)
1711: {
1712:   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1713:   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1714:   PetscInt               istart,iend;
1715:   DMBoundaryType         bx;
1716:   ISLocalToGlobalMapping ltog,mltog;

1718:   /*
1719:          nc - number of components per grid point
1720:          col - number of colors needed in one direction for single component problem
1721:   */
1722:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1723:   col  = 2*s + 1;

1725:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1726:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1728:   MatSetBlockSize(J,nc);
1729:   MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);

1731:   DMGetLocalToGlobalMapping(da,&ltog);
1732:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1733:   if (!mltog) {
1734:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1735:   }

1737:   /*
1738:     For each node in the grid: we get the neighbors in the local (on processor ordering
1739:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1740:     PETSc ordering.
1741:   */
1742:   if (!da->prealloc_only) {
1743:     PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1744:     for (i=xs; i<xs+nx; i++) {
1745:       istart = PetscMax(-s,gxs - i);
1746:       iend   = PetscMin(s,gxs + gnx - i - 1);
1747:       slot   = i - gxs;

1749:       cnt = 0;
1750:       for (i1=istart; i1<iend+1; i1++) {
1751:         cols[cnt++] = nc*(slot + i1);
1752:         for (l=1; l<nc; l++) {
1753:           cols[cnt] = 1 + cols[cnt-1];cnt++;
1754:         }
1755:       }
1756:       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1757:       MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1758:     }
1759:     /* do not copy values to GPU since they are all zero and not yet needed there */
1760:     MatBindToCPU(J,PETSC_TRUE);
1761:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1762:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1763:     if (bx == DM_BOUNDARY_NONE) {
1764:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1765:     }
1766:     MatBindToCPU(J,PETSC_FALSE);
1767:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1768:     PetscFree2(rows,cols);
1769:   }
1770:   MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1771:   return 0;
1772: }

1774: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1775: {
1776:   PetscErrorCode         ierr;
1777:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1778:   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1779:   PetscInt               istart,iend,jstart,jend,ii,jj;
1780:   MPI_Comm               comm;
1781:   PetscScalar            *values;
1782:   DMBoundaryType         bx,by;
1783:   DMDAStencilType        st;
1784:   ISLocalToGlobalMapping ltog;

1786:   /*
1787:      nc - number of components per grid point
1788:      col - number of colors needed in one direction for single component problem
1789:   */
1790:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
1791:   col  = 2*s + 1;

1793:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1794:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1795:   PetscObjectGetComm((PetscObject)da,&comm);

1797:   PetscMalloc1(col*col*nc*nc,&cols);

1799:   DMGetLocalToGlobalMapping(da,&ltog);

1801:   /* determine the matrix preallocation information */
1802:   MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
1803:   for (i=xs; i<xs+nx; i++) {
1804:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1805:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1806:     for (j=ys; j<ys+ny; j++) {
1807:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1808:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1809:       slot   = i - gxs + gnx*(j - gys);

1811:       /* Find block columns in block row */
1812:       cnt = 0;
1813:       for (ii=istart; ii<iend+1; ii++) {
1814:         for (jj=jstart; jj<jend+1; jj++) {
1815:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1816:             cols[cnt++] = slot + ii + gnx*jj;
1817:           }
1818:         }
1819:       }
1820:       MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1821:     }
1822:   }
1823:   MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1824:   MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1825:   MatPreallocateFinalize(dnz,onz);

1827:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1829:   /*
1830:     For each node in the grid: we get the neighbors in the local (on processor ordering
1831:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1832:     PETSc ordering.
1833:   */
1834:   if (!da->prealloc_only) {
1835:     PetscCalloc1(col*col*nc*nc,&values);
1836:     for (i=xs; i<xs+nx; i++) {
1837:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1838:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1839:       for (j=ys; j<ys+ny; j++) {
1840:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1841:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1842:         slot = i - gxs + gnx*(j - gys);
1843:         cnt  = 0;
1844:         for (ii=istart; ii<iend+1; ii++) {
1845:           for (jj=jstart; jj<jend+1; jj++) {
1846:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1847:               cols[cnt++] = slot + ii + gnx*jj;
1848:             }
1849:           }
1850:         }
1851:         MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1852:       }
1853:     }
1854:     PetscFree(values);
1855:     /* do not copy values to GPU since they are all zero and not yet needed there */
1856:     MatBindToCPU(J,PETSC_TRUE);
1857:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1858:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1859:     MatBindToCPU(J,PETSC_FALSE);
1860:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1861:   }
1862:   PetscFree(cols);
1863:   return 0;
1864: }

1866: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1867: {
1868:   PetscErrorCode         ierr;
1869:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1870:   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1871:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1872:   MPI_Comm               comm;
1873:   PetscScalar            *values;
1874:   DMBoundaryType         bx,by,bz;
1875:   DMDAStencilType        st;
1876:   ISLocalToGlobalMapping ltog;

1878:   /*
1879:          nc - number of components per grid point
1880:          col - number of colors needed in one direction for single component problem

1882:   */
1883:   DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);
1884:   col  = 2*s + 1;

1886:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1887:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1888:   PetscObjectGetComm((PetscObject)da,&comm);

1890:   PetscMalloc1(col*col*col,&cols);

1892:   DMGetLocalToGlobalMapping(da,&ltog);

1894:   /* determine the matrix preallocation information */
1895:   MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
1896:   for (i=xs; i<xs+nx; i++) {
1897:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1898:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1899:     for (j=ys; j<ys+ny; j++) {
1900:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1901:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1902:       for (k=zs; k<zs+nz; k++) {
1903:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1904:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1906:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1908:         /* Find block columns in block row */
1909:         cnt = 0;
1910:         for (ii=istart; ii<iend+1; ii++) {
1911:           for (jj=jstart; jj<jend+1; jj++) {
1912:             for (kk=kstart; kk<kend+1; kk++) {
1913:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1914:                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1915:               }
1916:             }
1917:           }
1918:         }
1919:         MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1920:       }
1921:     }
1922:   }
1923:   MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1924:   MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1925:   MatPreallocateFinalize(dnz,onz);

1927:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1929:   /*
1930:     For each node in the grid: we get the neighbors in the local (on processor ordering
1931:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1932:     PETSc ordering.
1933:   */
1934:   if (!da->prealloc_only) {
1935:     PetscCalloc1(col*col*col*nc*nc,&values);
1936:     for (i=xs; i<xs+nx; i++) {
1937:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1938:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1939:       for (j=ys; j<ys+ny; j++) {
1940:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1941:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1942:         for (k=zs; k<zs+nz; k++) {
1943:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1944:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1946:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1948:           cnt = 0;
1949:           for (ii=istart; ii<iend+1; ii++) {
1950:             for (jj=jstart; jj<jend+1; jj++) {
1951:               for (kk=kstart; kk<kend+1; kk++) {
1952:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1953:                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1954:                 }
1955:               }
1956:             }
1957:           }
1958:           MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1959:         }
1960:       }
1961:     }
1962:     PetscFree(values);
1963:     /* do not copy values to GPU since they are all zero and not yet needed there */
1964:     MatBindToCPU(J,PETSC_TRUE);
1965:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1966:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1967:     MatBindToCPU(J,PETSC_FALSE);
1968:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1969:   }
1970:   PetscFree(cols);
1971:   return 0;
1972: }

1974: /*
1975:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1976:   identify in the local ordering with periodic domain.
1977: */
1978: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1979: {
1980:   PetscInt       i,n;

1982:   ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);
1983:   ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);
1984:   for (i=0,n=0; i<*cnt; i++) {
1985:     if (col[i] >= *row) col[n++] = col[i];
1986:   }
1987:   *cnt = n;
1988:   return 0;
1989: }

1991: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1992: {
1993:   PetscErrorCode         ierr;
1994:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1995:   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1996:   PetscInt               istart,iend,jstart,jend,ii,jj;
1997:   MPI_Comm               comm;
1998:   PetscScalar            *values;
1999:   DMBoundaryType         bx,by;
2000:   DMDAStencilType        st;
2001:   ISLocalToGlobalMapping ltog;

2003:   /*
2004:      nc - number of components per grid point
2005:      col - number of colors needed in one direction for single component problem
2006:   */
2007:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
2008:   col  = 2*s + 1;

2010:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
2011:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
2012:   PetscObjectGetComm((PetscObject)da,&comm);

2014:   PetscMalloc1(col*col*nc*nc,&cols);

2016:   DMGetLocalToGlobalMapping(da,&ltog);

2018:   /* determine the matrix preallocation information */
2019:   MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
2020:   for (i=xs; i<xs+nx; i++) {
2021:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2022:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2023:     for (j=ys; j<ys+ny; j++) {
2024:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2025:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2026:       slot   = i - gxs + gnx*(j - gys);

2028:       /* Find block columns in block row */
2029:       cnt = 0;
2030:       for (ii=istart; ii<iend+1; ii++) {
2031:         for (jj=jstart; jj<jend+1; jj++) {
2032:           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2033:             cols[cnt++] = slot + ii + gnx*jj;
2034:           }
2035:         }
2036:       }
2037:       L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2038:       MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2039:     }
2040:   }
2041:   MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2042:   MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2043:   MatPreallocateFinalize(dnz,onz);

2045:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2047:   /*
2048:     For each node in the grid: we get the neighbors in the local (on processor ordering
2049:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2050:     PETSc ordering.
2051:   */
2052:   if (!da->prealloc_only) {
2053:     PetscCalloc1(col*col*nc*nc,&values);
2054:     for (i=xs; i<xs+nx; i++) {
2055:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2056:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2057:       for (j=ys; j<ys+ny; j++) {
2058:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2059:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2060:         slot   = i - gxs + gnx*(j - gys);

2062:         /* Find block columns in block row */
2063:         cnt = 0;
2064:         for (ii=istart; ii<iend+1; ii++) {
2065:           for (jj=jstart; jj<jend+1; jj++) {
2066:             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2067:               cols[cnt++] = slot + ii + gnx*jj;
2068:             }
2069:           }
2070:         }
2071:         L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2072:         MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2073:       }
2074:     }
2075:     PetscFree(values);
2076:     /* do not copy values to GPU since they are all zero and not yet needed there */
2077:     MatBindToCPU(J,PETSC_TRUE);
2078:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2079:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2080:     MatBindToCPU(J,PETSC_FALSE);
2081:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2082:   }
2083:   PetscFree(cols);
2084:   return 0;
2085: }

2087: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2088: {
2089:   PetscErrorCode         ierr;
2090:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2091:   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2092:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2093:   MPI_Comm               comm;
2094:   PetscScalar            *values;
2095:   DMBoundaryType         bx,by,bz;
2096:   DMDAStencilType        st;
2097:   ISLocalToGlobalMapping ltog;

2099:   /*
2100:      nc - number of components per grid point
2101:      col - number of colors needed in one direction for single component problem
2102:   */
2103:   DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);
2104:   col  = 2*s + 1;

2106:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2107:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2108:   PetscObjectGetComm((PetscObject)da,&comm);

2110:   /* create the matrix */
2111:   PetscMalloc1(col*col*col,&cols);

2113:   DMGetLocalToGlobalMapping(da,&ltog);

2115:   /* determine the matrix preallocation information */
2116:   MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
2117:   for (i=xs; i<xs+nx; i++) {
2118:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2119:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2120:     for (j=ys; j<ys+ny; j++) {
2121:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2122:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2123:       for (k=zs; k<zs+nz; k++) {
2124:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2125:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2127:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2129:         /* Find block columns in block row */
2130:         cnt = 0;
2131:         for (ii=istart; ii<iend+1; ii++) {
2132:           for (jj=jstart; jj<jend+1; jj++) {
2133:             for (kk=kstart; kk<kend+1; kk++) {
2134:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2135:                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2136:               }
2137:             }
2138:           }
2139:         }
2140:         L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2141:         MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2142:       }
2143:     }
2144:   }
2145:   MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2146:   MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2147:   MatPreallocateFinalize(dnz,onz);

2149:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2151:   /*
2152:     For each node in the grid: we get the neighbors in the local (on processor ordering
2153:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2154:     PETSc ordering.
2155:   */
2156:   if (!da->prealloc_only) {
2157:     PetscCalloc1(col*col*col*nc*nc,&values);
2158:     for (i=xs; i<xs+nx; i++) {
2159:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2160:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2161:       for (j=ys; j<ys+ny; j++) {
2162:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2163:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2164:         for (k=zs; k<zs+nz; k++) {
2165:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2166:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2168:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2170:           cnt = 0;
2171:           for (ii=istart; ii<iend+1; ii++) {
2172:             for (jj=jstart; jj<jend+1; jj++) {
2173:               for (kk=kstart; kk<kend+1; kk++) {
2174:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2175:                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2176:                 }
2177:               }
2178:             }
2179:           }
2180:           L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2181:           MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2182:         }
2183:       }
2184:     }
2185:     PetscFree(values);
2186:     /* do not copy values to GPU since they are all zero and not yet needed there */
2187:     MatBindToCPU(J,PETSC_TRUE);
2188:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2189:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2190:     MatBindToCPU(J,PETSC_FALSE);
2191:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2192:   }
2193:   PetscFree(cols);
2194:   return 0;
2195: }

2197: /* ---------------------------------------------------------------------------------*/

2199: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2200: {
2201:   PetscErrorCode         ierr;
2202:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2203:   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2204:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2205:   DM_DA                  *dd = (DM_DA*)da->data;
2206:   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2207:   MPI_Comm               comm;
2208:   PetscScalar            *values;
2209:   DMBoundaryType         bx,by,bz;
2210:   ISLocalToGlobalMapping ltog;
2211:   DMDAStencilType        st;
2212:   PetscBool              removedups = PETSC_FALSE;

2214:   /*
2215:          nc - number of components per grid point
2216:          col - number of colors needed in one direction for single component problem

2218:   */
2219:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
2220:   col  = 2*s + 1;
2222:                  by 2*stencil_width + 1\n");
2224:                  by 2*stencil_width + 1\n");
2226:                  by 2*stencil_width + 1\n");

2228:   /*
2229:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2230:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2231:   */
2232:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2233:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2234:   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;

2236:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2237:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2238:   PetscObjectGetComm((PetscObject)da,&comm);

2240:   PetscMalloc1(col*col*col*nc,&cols);
2241:   DMGetLocalToGlobalMapping(da,&ltog);

2243:   /* determine the matrix preallocation information */
2244:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);

2246:   MatSetBlockSize(J,nc);
2247:   for (i=xs; i<xs+nx; i++) {
2248:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2249:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2250:     for (j=ys; j<ys+ny; j++) {
2251:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2252:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2253:       for (k=zs; k<zs+nz; k++) {
2254:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2255:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2257:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2259:         for (l=0; l<nc; l++) {
2260:           cnt = 0;
2261:           for (ii=istart; ii<iend+1; ii++) {
2262:             for (jj=jstart; jj<jend+1; jj++) {
2263:               for (kk=kstart; kk<kend+1; kk++) {
2264:                 if (ii || jj || kk) {
2265:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2266:                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2267:                   }
2268:                 } else {
2269:                   if (dfill) {
2270:                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2271:                   } else {
2272:                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2273:                   }
2274:                 }
2275:               }
2276:             }
2277:           }
2278:           row  = l + nc*(slot);
2279:           maxcnt = PetscMax(maxcnt,cnt);
2280:           if (removedups) {
2281:             MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2282:           } else {
2283:             MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2284:           }
2285:         }
2286:       }
2287:     }
2288:   }
2289:   MatSeqAIJSetPreallocation(J,0,dnz);
2290:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
2291:   MatPreallocateFinalize(dnz,onz);
2292:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2294:   /*
2295:     For each node in the grid: we get the neighbors in the local (on processor ordering
2296:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2297:     PETSc ordering.
2298:   */
2299:   if (!da->prealloc_only) {
2300:     PetscCalloc1(maxcnt,&values);
2301:     for (i=xs; i<xs+nx; i++) {
2302:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2303:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2304:       for (j=ys; j<ys+ny; j++) {
2305:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2306:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2307:         for (k=zs; k<zs+nz; k++) {
2308:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2309:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2311:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2313:           for (l=0; l<nc; l++) {
2314:             cnt = 0;
2315:             for (ii=istart; ii<iend+1; ii++) {
2316:               for (jj=jstart; jj<jend+1; jj++) {
2317:                 for (kk=kstart; kk<kend+1; kk++) {
2318:                   if (ii || jj || kk) {
2319:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2320:                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2321:                     }
2322:                   } else {
2323:                     if (dfill) {
2324:                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2325:                     } else {
2326:                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2327:                     }
2328:                   }
2329:                 }
2330:               }
2331:             }
2332:             row  = l + nc*(slot);
2333:             MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);
2334:           }
2335:         }
2336:       }
2337:     }
2338:     PetscFree(values);
2339:     /* do not copy values to GPU since they are all zero and not yet needed there */
2340:     MatBindToCPU(J,PETSC_TRUE);
2341:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2342:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2343:     MatBindToCPU(J,PETSC_FALSE);
2344:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2345:   }
2346:   PetscFree(cols);
2347:   return 0;
2348: }