Actual source code: bddcschurs.c

  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <../src/mat/impls/dense/seq/dense.h>
  4: #include <petscblaslapack.h>

  6: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
  7: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
  8: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
  9: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);

 11: /* if v2 is not present, correction is done in-place */
 12: PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
 13: {
 14:   PetscScalar    *array;
 15:   PetscScalar    *array2;

 17:   if (!ctx->benign_n) return 0;
 18:   if (sol && full) {
 19:     PetscInt n_I,size_schur;

 21:     /* get sizes */
 22:     MatGetSize(ctx->benign_csAIB,&size_schur,NULL);
 23:     VecGetSize(v,&n_I);
 24:     n_I = n_I - size_schur;
 25:     /* get schur sol from array */
 26:     VecGetArray(v,&array);
 27:     VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I);
 28:     VecRestoreArray(v,&array);
 29:     /* apply interior sol correction */
 30:     MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work);
 31:     VecResetArray(ctx->benign_dummy_schur_vec);
 32:     MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v);
 33:   }
 34:   if (v2) {
 35:     PetscInt nl;

 37:     VecGetArrayRead(v,(const PetscScalar**)&array);
 38:     VecGetLocalSize(v2,&nl);
 39:     VecGetArray(v2,&array2);
 40:     PetscArraycpy(array2,array,nl);
 41:   } else {
 42:     VecGetArray(v,&array);
 43:     array2 = array;
 44:   }
 45:   if (!sol) { /* change rhs */
 46:     PetscInt n;
 47:     for (n=0;n<ctx->benign_n;n++) {
 48:       PetscScalar    sum = 0.;
 49:       const PetscInt *cols;
 50:       PetscInt       nz,i;

 52:       ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz);
 53:       ISGetIndices(ctx->benign_zerodiag_subs[n],&cols);
 54:       for (i=0;i<nz-1;i++) sum += array[cols[i]];
 55: #if defined(PETSC_USE_COMPLEX)
 56:       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
 57: #else
 58:       sum = -sum/nz;
 59: #endif
 60:       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
 61:       ctx->benign_save_vals[n] = array2[cols[nz-1]];
 62:       array2[cols[nz-1]] = sum;
 63:       ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols);
 64:     }
 65:   } else {
 66:     PetscInt n;
 67:     for (n=0;n<ctx->benign_n;n++) {
 68:       PetscScalar    sum = 0.;
 69:       const PetscInt *cols;
 70:       PetscInt       nz,i;
 71:       ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz);
 72:       ISGetIndices(ctx->benign_zerodiag_subs[n],&cols);
 73:       for (i=0;i<nz-1;i++) sum += array[cols[i]];
 74: #if defined(PETSC_USE_COMPLEX)
 75:       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
 76: #else
 77:       sum = -sum/nz;
 78: #endif
 79:       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
 80:       array2[cols[nz-1]] = ctx->benign_save_vals[n];
 81:       ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols);
 82:     }
 83:   }
 84:   if (v2) {
 85:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
 86:     VecRestoreArray(v2,&array2);
 87:   } else {
 88:     VecRestoreArray(v,&array);
 89:   }
 90:   if (!sol && full) {
 91:     Vec      usedv;
 92:     PetscInt n_I,size_schur;

 94:     /* get sizes */
 95:     MatGetSize(ctx->benign_csAIB,&size_schur,NULL);
 96:     VecGetSize(v,&n_I);
 97:     n_I = n_I - size_schur;
 98:     /* compute schur rhs correction */
 99:     if (v2) {
100:       usedv = v2;
101:     } else {
102:       usedv = v;
103:     }
104:     /* apply schur rhs correction */
105:     MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work);
106:     VecGetArrayRead(usedv,(const PetscScalar**)&array);
107:     VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I);
108:     VecRestoreArrayRead(usedv,(const PetscScalar**)&array);
109:     MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec);
110:     VecResetArray(ctx->benign_dummy_schur_vec);
111:   }
112:   return 0;
113: }

115: static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
116: {
117:   PCBDDCReuseSolvers ctx;
118:   PetscBool          copy = PETSC_FALSE;

120:   PCShellGetContext(pc,&ctx);
121:   if (full) {
122: #if defined(PETSC_HAVE_MUMPS)
123:     MatMumpsSetIcntl(ctx->F,26,-1);
124: #endif
125: #if defined(PETSC_HAVE_MKL_PARDISO)
126:     MatMkl_PardisoSetCntl(ctx->F,70,0);
127: #endif
128:     copy = ctx->has_vertices;
129:   } else { /* interior solver */
130: #if defined(PETSC_HAVE_MUMPS)
131:     MatMumpsSetIcntl(ctx->F,26,0);
132: #endif
133: #if defined(PETSC_HAVE_MKL_PARDISO)
134:     MatMkl_PardisoSetCntl(ctx->F,70,1);
135: #endif
136:     copy = PETSC_TRUE;
137:   }
138:   /* copy rhs into factored matrix workspace */
139:   if (copy) {
140:     PetscInt    n;
141:     PetscScalar *array,*array_solver;

143:     VecGetLocalSize(rhs,&n);
144:     VecGetArrayRead(rhs,(const PetscScalar**)&array);
145:     VecGetArray(ctx->rhs,&array_solver);
146:     PetscArraycpy(array_solver,array,n);
147:     VecRestoreArray(ctx->rhs,&array_solver);
148:     VecRestoreArrayRead(rhs,(const PetscScalar**)&array);

150:     PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full);
151:     if (transpose) {
152:       MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
153:     } else {
154:       MatSolve(ctx->F,ctx->rhs,ctx->sol);
155:     }
156:     PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full);

158:     /* get back data to caller worskpace */
159:     VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver);
160:     VecGetArray(sol,&array);
161:     PetscArraycpy(array,array_solver,n);
162:     VecRestoreArray(sol,&array);
163:     VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver);
164:   } else {
165:     if (ctx->benign_n) {
166:       PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full);
167:       if (transpose) {
168:         MatSolveTranspose(ctx->F,ctx->rhs,sol);
169:       } else {
170:         MatSolve(ctx->F,ctx->rhs,sol);
171:       }
172:       PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full);
173:     } else {
174:       if (transpose) {
175:         MatSolveTranspose(ctx->F,rhs,sol);
176:       } else {
177:         MatSolve(ctx->F,rhs,sol);
178:       }
179:     }
180:   }
181:   /* restore defaults */
182: #if defined(PETSC_HAVE_MUMPS)
183:   MatMumpsSetIcntl(ctx->F,26,-1);
184: #endif
185: #if defined(PETSC_HAVE_MKL_PARDISO)
186:   MatMkl_PardisoSetCntl(ctx->F,70,0);
187: #endif
188:   return 0;
189: }

191: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
192: {
193:   PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE);
194:   return 0;
195: }

197: static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
198: {
199:   PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE);
200:   return 0;
201: }

203: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
204: {
205:   PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE);
206:   return 0;
207: }

209: static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
210: {
211:   PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE);
212:   return 0;
213: }

215: static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
216: {
217:   PCBDDCReuseSolvers ctx;
218:   PetscBool          iascii;

220:   PCShellGetContext(pc,&ctx);
221:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
222:   if (iascii) {
223:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO);
224:   }
225:   MatView(ctx->F,viewer);
226:   if (iascii) {
227:     PetscViewerPopFormat(viewer);
228:   }
229:   return 0;
230: }

232: static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
233: {
234:   PetscInt       i;

236:   MatDestroy(&reuse->F);
237:   VecDestroy(&reuse->sol);
238:   VecDestroy(&reuse->rhs);
239:   PCDestroy(&reuse->interior_solver);
240:   PCDestroy(&reuse->correction_solver);
241:   ISDestroy(&reuse->is_R);
242:   ISDestroy(&reuse->is_B);
243:   VecScatterDestroy(&reuse->correction_scatter_B);
244:   VecDestroy(&reuse->sol_B);
245:   VecDestroy(&reuse->rhs_B);
246:   for (i=0;i<reuse->benign_n;i++) {
247:     ISDestroy(&reuse->benign_zerodiag_subs[i]);
248:   }
249:   PetscFree(reuse->benign_zerodiag_subs);
250:   PetscFree(reuse->benign_save_vals);
251:   MatDestroy(&reuse->benign_csAIB);
252:   MatDestroy(&reuse->benign_AIIm1ones);
253:   VecDestroy(&reuse->benign_corr_work);
254:   VecDestroy(&reuse->benign_dummy_schur_vec);
255:   return 0;
256: }

258: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
259: {
260:   Mat            B, C, D, Bd, Cd, AinvBd;
261:   KSP            ksp;
262:   PC             pc;
263:   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
264:   PetscReal      fill = 2.0;
265:   PetscInt       n_I;
266:   PetscMPIInt    size;

268:   MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
270:   if (reuse == MAT_REUSE_MATRIX) {
271:     PetscBool Sdense;

273:     PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
275:   }
276:   MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
277:   MatSchurComplementGetKSP(M, &ksp);
278:   KSPGetPC(ksp, &pc);
279:   PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
280:   PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
281:   PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
282:   PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
283:   PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
284:   MatGetSize(B,&n_I,NULL);
285:   if (n_I) {
286:     if (!Bdense) {
287:       MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
288:     } else {
289:       Bd = B;
290:     }

292:     if (isLU || isILU || isCHOL) {
293:       Mat fact;
294:       KSPSetUp(ksp);
295:       PCFactorGetMatrix(pc, &fact);
296:       MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
297:       MatMatSolve(fact, Bd, AinvBd);
298:     } else {
299:       PetscBool ex = PETSC_TRUE;

301:       if (ex) {
302:         Mat Ainvd;

304:         PCComputeOperator(pc, MATDENSE, &Ainvd);
305:         MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
306:         MatDestroy(&Ainvd);
307:       } else {
308:         Vec         sol,rhs;
309:         PetscScalar *arrayrhs,*arraysol;
310:         PetscInt    i,nrhs,n;

312:         MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
313:         MatGetSize(Bd,&n,&nrhs);
314:         MatDenseGetArray(Bd,&arrayrhs);
315:         MatDenseGetArray(AinvBd,&arraysol);
316:         KSPGetSolution(ksp,&sol);
317:         KSPGetRhs(ksp,&rhs);
318:         for (i=0;i<nrhs;i++) {
319:           VecPlaceArray(rhs,arrayrhs+i*n);
320:           VecPlaceArray(sol,arraysol+i*n);
321:           KSPSolve(ksp,rhs,sol);
322:           VecResetArray(rhs);
323:           VecResetArray(sol);
324:         }
325:         MatDenseRestoreArray(Bd,&arrayrhs);
326:         MatDenseRestoreArray(AinvBd,&arrayrhs);
327:       }
328:     }
329:     if (!Bdense & !issym) {
330:       MatDestroy(&Bd);
331:     }

333:     if (!issym) {
334:       if (!Cdense) {
335:         MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
336:       } else {
337:         Cd = C;
338:       }
339:       MatMatMult(Cd, AinvBd, reuse, fill, S);
340:       if (!Cdense) {
341:         MatDestroy(&Cd);
342:       }
343:     } else {
344:       MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
345:       if (!Bdense) {
346:         MatDestroy(&Bd);
347:       }
348:     }
349:     MatDestroy(&AinvBd);
350:   }

352:   if (D) {
353:     Mat       Dd;
354:     PetscBool Ddense;

356:     PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
357:     if (!Ddense) {
358:       MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
359:     } else {
360:       Dd = D;
361:     }
362:     if (n_I) {
363:       MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
364:     } else {
365:       if (reuse == MAT_INITIAL_MATRIX) {
366:         MatDuplicate(Dd,MAT_COPY_VALUES,S);
367:       } else {
368:         MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
369:       }
370:     }
371:     if (!Ddense) {
372:       MatDestroy(&Dd);
373:     }
374:   } else {
375:     MatScale(*S,-1.0);
376:   }
377:   return 0;
378: }

380: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
381: {
382:   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
383:   Mat                    S_all;
384:   Vec                    gstash,lstash;
385:   VecScatter             sstash;
386:   IS                     is_I,is_I_layer;
387:   IS                     all_subsets,all_subsets_mult,all_subsets_n;
388:   PetscScalar            *stasharray,*Bwork;
389:   PetscInt               *nnz,*all_local_idx_N;
390:   PetscInt               *auxnum1,*auxnum2;
391:   PetscInt               i,subset_size,max_subset_size;
392:   PetscInt               n_B,extra,local_size,global_size;
393:   PetscInt               local_stash_size;
394:   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
395:   MPI_Comm               comm_n;
396:   PetscBool              deluxe = PETSC_TRUE;
397:   PetscBool              use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
398:   PetscViewer            matl_dbg_viewer = NULL;
399:   PetscErrorCode         ierr;
400:   PetscBool              flg;

402:   MatDestroy(&sub_schurs->A);
403:   MatDestroy(&sub_schurs->S);
404:   if (Ain) {
405:     PetscObjectReference((PetscObject)Ain);
406:     sub_schurs->A = Ain;
407:   }

409:   PetscObjectReference((PetscObject)Sin);
410:   sub_schurs->S = Sin;
411:   if (sub_schurs->schur_explicit) {
412:     sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
413:   }

415:   /* preliminary checks */

418:   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;

420:   /* debug (MATLAB) */
421:   if (sub_schurs->debug) {
422:     PetscMPIInt size,rank;
423:     PetscInt    nr,*print_schurs_ranks,print_schurs = PETSC_FALSE;

425:     MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size);
426:     MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
427:     nr   = size;
428:     PetscMalloc1(nr,&print_schurs_ranks);
429:     PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
430:     PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg);
431:     if (!flg) print_schurs = PETSC_TRUE;
432:     else {
433:       print_schurs = PETSC_FALSE;
434:       for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
435:     }
436:     PetscOptionsEnd();
437:     PetscFree(print_schurs_ranks);
438:     if (print_schurs) {
439:       char filename[256];

441:       PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank);
442:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer);
443:       PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB);
444:     }
445:   }

447:   /* restrict work on active processes */
448:   if (sub_schurs->restrict_comm) {
449:     PetscSubcomm subcomm;
450:     PetscMPIInt  color,rank;

452:     color = 0;
453:     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
454:     MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
455:     PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
456:     PetscSubcommSetNumber(subcomm,2);
457:     PetscSubcommSetTypeGeneral(subcomm,color,rank);
458:     PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
459:     PetscSubcommDestroy(&subcomm);
460:     if (!sub_schurs->n_subs) {
461:       PetscCommDestroy(&comm_n);
462:       return 0;
463:     }
464:   } else {
465:     PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL);
466:   }

468:   /* get Schur complement matrices */
469:   if (!sub_schurs->schur_explicit) {
470:     Mat       tA_IB,tA_BI,tA_BB;
471:     PetscBool isseqsbaij;
472:     MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
473:     PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
474:     if (isseqsbaij) {
475:       MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
476:       MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
477:       MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
478:     } else {
479:       PetscObjectReference((PetscObject)tA_BB);
480:       A_BB = tA_BB;
481:       PetscObjectReference((PetscObject)tA_IB);
482:       A_IB = tA_IB;
483:       PetscObjectReference((PetscObject)tA_BI);
484:       A_BI = tA_BI;
485:     }
486:   } else {
487:     A_II = NULL;
488:     A_IB = NULL;
489:     A_BI = NULL;
490:     A_BB = NULL;
491:   }
492:   S_all = NULL;

494:   /* determine interior problems */
495:   ISGetLocalSize(sub_schurs->is_I,&i);
496:   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
497:     PetscBT                touched;
498:     const PetscInt*        idx_B;
499:     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;

502:     /* get sizes */
503:     ISGetLocalSize(sub_schurs->is_I,&n_I);
504:     ISGetLocalSize(sub_schurs->is_B,&n_B);

506:     PetscMalloc1(n_I+n_B,&local_numbering);
507:     PetscBTCreate(n_I+n_B,&touched);
508:     PetscBTMemzero(n_I+n_B,touched);

510:     /* all boundary dofs must be skipped when adding layers */
511:     ISGetIndices(sub_schurs->is_B,&idx_B);
512:     for (j=0;j<n_B;j++) {
513:       PetscBTSet(touched,idx_B[j]);
514:     }
515:     PetscArraycpy(local_numbering,idx_B,n_B);
516:     ISRestoreIndices(sub_schurs->is_B,&idx_B);

518:     /* add prescribed number of layers of dofs */
519:     n_local_dofs = n_B;
520:     n_prev_added = n_B;
521:     for (layer=0;layer<nlayers;layer++) {
522:       PetscInt n_added = 0;
523:       if (n_local_dofs == n_I+n_B) break;
525:       PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
526:       n_prev_added = n_added;
527:       n_local_dofs += n_added;
528:       if (!n_added) break;
529:     }
530:     PetscBTDestroy(&touched);

532:     /* IS for I layer dofs in original numbering */
533:     ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
534:     PetscFree(local_numbering);
535:     ISSort(is_I_layer);
536:     /* IS for I layer dofs in I numbering */
537:     if (!sub_schurs->schur_explicit) {
538:       ISLocalToGlobalMapping ItoNmap;
539:       ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
540:       ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
541:       ISLocalToGlobalMappingDestroy(&ItoNmap);

543:       /* II block */
544:       MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
545:     }
546:   } else {
547:     PetscInt n_I;

549:     /* IS for I dofs in original numbering */
550:     PetscObjectReference((PetscObject)sub_schurs->is_I);
551:     is_I_layer = sub_schurs->is_I;

553:     /* IS for I dofs in I numbering (strided 1) */
554:     if (!sub_schurs->schur_explicit) {
555:       ISGetSize(sub_schurs->is_I,&n_I);
556:       ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);

558:       /* II block is the same */
559:       PetscObjectReference((PetscObject)A_II);
560:       AE_II = A_II;
561:     }
562:   }

564:   /* Get info on subset sizes and sum of all subsets sizes */
565:   max_subset_size = 0;
566:   local_size = 0;
567:   for (i=0;i<sub_schurs->n_subs;i++) {
568:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
569:     max_subset_size = PetscMax(subset_size,max_subset_size);
570:     local_size += subset_size;
571:   }

573:   /* Work arrays for local indices */
574:   extra = 0;
575:   ISGetLocalSize(sub_schurs->is_B,&n_B);
576:   if (sub_schurs->schur_explicit && is_I_layer) {
577:     ISGetLocalSize(is_I_layer,&extra);
578:   }
579:   PetscMalloc1(n_B+extra,&all_local_idx_N);
580:   if (extra) {
581:     const PetscInt *idxs;
582:     ISGetIndices(is_I_layer,&idxs);
583:     PetscArraycpy(all_local_idx_N,idxs,extra);
584:     ISRestoreIndices(is_I_layer,&idxs);
585:   }
586:   PetscMalloc1(sub_schurs->n_subs,&auxnum1);
587:   PetscMalloc1(sub_schurs->n_subs,&auxnum2);

589:   /* Get local indices in local numbering */
590:   local_size = 0;
591:   local_stash_size = 0;
592:   for (i=0;i<sub_schurs->n_subs;i++) {
593:     const PetscInt *idxs;

595:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
596:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
597:     /* start (smallest in global ordering) and multiplicity */
598:     auxnum1[i] = idxs[0];
599:     auxnum2[i] = subset_size*subset_size;
600:     /* subset indices in local numbering */
601:     PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size);
602:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
603:     local_size += subset_size;
604:     local_stash_size += subset_size*subset_size;
605:   }

607:   /* allocate extra workspace needed only for GETRI or SYTRF */
608:   use_potr = use_sytr = PETSC_FALSE;
609:   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
610:     use_potr = PETSC_TRUE;
611:   } else if (sub_schurs->is_symmetric) {
612:     use_sytr = PETSC_TRUE;
613:   }
614:   if (local_size && !use_potr) {
615:     PetscScalar  lwork,dummyscalar = 0.;
616:     PetscBLASInt dummyint = 0;

618:     B_lwork = -1;
619:     PetscBLASIntCast(local_size,&B_N);
620:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
621:     if (use_sytr) {
622:       PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
624:     } else {
625:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
627:     }
628:     PetscFPTrapPop();
629:     PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
630:     PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
631:   } else {
632:     Bwork = NULL;
633:     pivots = NULL;
634:   }

636:   /* prepare data for summing up properly schurs on subsets */
637:   ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
638:   ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
639:   ISDestroy(&all_subsets_n);
640:   ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
641:   ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
642:   ISDestroy(&all_subsets);
643:   ISDestroy(&all_subsets_mult);
644:   ISGetLocalSize(all_subsets_n,&i);
646:   VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash);
647:   VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash);
648:   VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash);
649:   ISDestroy(&all_subsets_n);

651:   /* subset indices in local boundary numbering */
652:   if (!sub_schurs->is_Ej_all) {
653:     PetscInt *all_local_idx_B;

655:     PetscMalloc1(local_size,&all_local_idx_B);
656:     ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
658:     ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
659:   }

661:   if (change) {
662:     ISLocalToGlobalMapping BtoS;
663:     IS                     change_primal_B;
664:     IS                     change_primal_all;

668:     PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub);
669:     for (i=0;i<sub_schurs->n_subs;i++) {
670:       ISLocalToGlobalMapping NtoS;
671:       ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS);
672:       ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]);
673:       ISLocalToGlobalMappingDestroy(&NtoS);
674:     }
675:     ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B);
676:     ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS);
677:     ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all);
678:     ISLocalToGlobalMappingDestroy(&BtoS);
679:     ISDestroy(&change_primal_B);
680:     PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change);
681:     for (i=0;i<sub_schurs->n_subs;i++) {
682:       Mat change_sub;

684:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
685:       KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]);
686:       KSPSetType(sub_schurs->change[i],KSPPREONLY);
687:       if (!sub_schurs->change_with_qr) {
688:         MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub);
689:       } else {
690:         Mat change_subt;
691:         MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt);
692:         MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub);
693:         MatDestroy(&change_subt);
694:       }
695:       KSPSetOperators(sub_schurs->change[i],change_sub,change_sub);
696:       MatDestroy(&change_sub);
697:       KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix);
698:       KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_");
699:     }
700:     ISDestroy(&change_primal_all);
701:   }

703:   /* Local matrix of all local Schur on subsets (transposed) */
704:   if (!sub_schurs->S_Ej_all) {
705:     Mat         T;
706:     PetscScalar *v;
707:     PetscInt    *ii,*jj;
708:     PetscInt    cum,i,j,k;

710:     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
711:        Allocate properly a representative matrix and duplicate */
712:     PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v);
713:     ii[0] = 0;
714:     cum   = 0;
715:     for (i=0;i<sub_schurs->n_subs;i++) {
716:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
717:       for (j=0;j<subset_size;j++) {
718:         const PetscInt row = cum+j;
719:         PetscInt col = cum;

721:         ii[row+1] = ii[row] + subset_size;
722:         for (k=ii[row];k<ii[row+1];k++) {
723:           jj[k] = col;
724:           col++;
725:         }
726:       }
727:       cum += subset_size;
728:     }
729:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T);
730:     MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all);
731:     MatDestroy(&T);
732:     PetscFree3(ii,jj,v);
733:   }
734:   /* matrices for deluxe scaling and adaptive selection */
735:   if (compute_Stilda) {
736:     if (!sub_schurs->sum_S_Ej_tilda_all) {
737:       MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all);
738:     }
739:     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
740:       MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all);
741:     }
742:   }

744:   /* Compute Schur complements explicitly */
745:   F = NULL;
746:   if (!sub_schurs->schur_explicit) {
747:     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
748:        it is not efficient, unless the economic version of the scaling is used */
749:     Mat         S_Ej_expl;
750:     PetscScalar *work;
751:     PetscInt    j,*dummy_idx;
752:     PetscBool   Sdense;

754:     PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
755:     local_size = 0;
756:     for (i=0;i<sub_schurs->n_subs;i++) {
757:       IS  is_subset_B;
758:       Mat AE_EE,AE_IE,AE_EI,S_Ej;

760:       /* subsets in original and boundary numbering */
761:       ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
762:       /* EE block */
763:       MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
764:       /* IE block */
765:       MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
766:       /* EI block */
767:       if (sub_schurs->is_symmetric) {
768:         MatCreateTranspose(AE_IE,&AE_EI);
769:       } else if (sub_schurs->is_hermitian) {
770:         MatCreateHermitianTranspose(AE_IE,&AE_EI);
771:       } else {
772:         MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
773:       }
774:       ISDestroy(&is_subset_B);
775:       MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
776:       MatDestroy(&AE_EE);
777:       MatDestroy(&AE_IE);
778:       MatDestroy(&AE_EI);
779:       if (AE_II == A_II) { /* we can reuse the same ksp */
780:         KSP ksp;
781:         MatSchurComplementGetKSP(sub_schurs->S,&ksp);
782:         MatSchurComplementSetKSP(S_Ej,ksp);
783:       } else { /* build new ksp object which inherits ksp and pc types from the original one */
784:         KSP       origksp,schurksp;
785:         PC        origpc,schurpc;
786:         KSPType   ksp_type;
787:         PetscInt  n_internal;
788:         PetscBool ispcnone;

790:         MatSchurComplementGetKSP(sub_schurs->S,&origksp);
791:         MatSchurComplementGetKSP(S_Ej,&schurksp);
792:         KSPGetType(origksp,&ksp_type);
793:         KSPSetType(schurksp,ksp_type);
794:         KSPGetPC(schurksp,&schurpc);
795:         KSPGetPC(origksp,&origpc);
796:         PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
797:         if (!ispcnone) {
798:           PCType pc_type;
799:           PCGetType(origpc,&pc_type);
800:           PCSetType(schurpc,pc_type);
801:         } else {
802:           PCSetType(schurpc,PCLU);
803:         }
804:         ISGetSize(is_I,&n_internal);
805:         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
806:           MatSolverType solver = NULL;
807:           PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver);
808:           if (solver) {
809:             PCFactorSetMatSolverType(schurpc,solver);
810:           }
811:         }
812:         KSPSetUp(schurksp);
813:       }
814:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
815:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
816:       PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl);
817:       PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
818:       if (Sdense) {
819:         for (j=0;j<subset_size;j++) {
820:           dummy_idx[j]=local_size+j;
821:         }
822:         MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
823:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
824:       MatDestroy(&S_Ej);
825:       MatDestroy(&S_Ej_expl);
826:       local_size += subset_size;
827:     }
828:     PetscFree2(dummy_idx,work);
829:     /* free */
830:     ISDestroy(&is_I);
831:     MatDestroy(&AE_II);
832:     PetscFree(all_local_idx_N);
833:   } else {
834:     Mat               A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
835:     Vec               Dall = NULL;
836:     IS                is_A_all,*is_p_r = NULL;
837:     MatType           Stype;
838:     PetscScalar       *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
839:     PetscScalar       *SEj_arr = NULL,*SEjinv_arr = NULL;
840:     const PetscScalar *rS_data;
841:     PetscInt          n,n_I,size_schur,size_active_schur,cum,cum2;
842:     PetscBool         economic,solver_S,S_lower_triangular = PETSC_FALSE;
843:     PetscBool         schur_has_vertices,factor_workaround;
844:     PetscBool         use_cholesky;
845: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846:     PetscBool         oldpin;
847: #endif

849:     /* get sizes */
850:     n_I = 0;
851:     if (is_I_layer) {
852:       ISGetLocalSize(is_I_layer,&n_I);
853:     }
854:     economic = PETSC_FALSE;
855:     ISGetLocalSize(sub_schurs->is_I,&cum);
856:     if (cum != n_I) economic = PETSC_TRUE;
857:     MatGetLocalSize(sub_schurs->A,&n,NULL);
858:     size_active_schur = local_size;

860:     /* import scaling vector (wrong formulation if we have 3D edges) */
861:     if (scaling && compute_Stilda) {
862:       const PetscScalar *array;
863:       PetscScalar       *array2;
864:       const PetscInt    *idxs;
865:       PetscInt          i;

867:       ISGetIndices(sub_schurs->is_Ej_all,&idxs);
868:       VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall);
869:       VecGetArrayRead(scaling,&array);
870:       VecGetArray(Dall,&array2);
871:       for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
872:       VecRestoreArray(Dall,&array2);
873:       VecRestoreArrayRead(scaling,&array);
874:       ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
875:       deluxe = PETSC_FALSE;
876:     }

878:     /* size active schurs does not count any dirichlet or vertex dof on the interface */
879:     factor_workaround = PETSC_FALSE;
880:     schur_has_vertices = PETSC_FALSE;
881:     cum = n_I+size_active_schur;
882:     if (sub_schurs->is_dir) {
883:       const PetscInt* idxs;
884:       PetscInt        n_dir;

886:       ISGetLocalSize(sub_schurs->is_dir,&n_dir);
887:       ISGetIndices(sub_schurs->is_dir,&idxs);
888:       PetscArraycpy(all_local_idx_N+cum,idxs,n_dir);
889:       ISRestoreIndices(sub_schurs->is_dir,&idxs);
890:       cum += n_dir;
891:       factor_workaround = PETSC_TRUE;
892:     }
893:     /* include the primal vertices in the Schur complement */
894:     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
895:       PetscInt n_v;

897:       ISGetLocalSize(sub_schurs->is_vertices,&n_v);
898:       if (n_v) {
899:         const PetscInt* idxs;

901:         ISGetIndices(sub_schurs->is_vertices,&idxs);
902:         PetscArraycpy(all_local_idx_N+cum,idxs,n_v);
903:         ISRestoreIndices(sub_schurs->is_vertices,&idxs);
904:         cum += n_v;
905:         factor_workaround = PETSC_TRUE;
906:         schur_has_vertices = PETSC_TRUE;
907:       }
908:     }
909:     size_schur = cum - n_I;
910:     ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all);
911: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
912:     oldpin = sub_schurs->A->boundtocpu;
913:     MatBindToCPU(sub_schurs->A,PETSC_TRUE);
914: #endif
915:     if (cum == n) {
916:       ISSetPermutation(is_A_all);
917:       MatPermute(sub_schurs->A,is_A_all,is_A_all,&A);
918:     } else {
919:       MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
920:     }
921: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
922:     MatBindToCPU(sub_schurs->A,oldpin);
923: #endif
924:     MatSetOptionsPrefix(A,sub_schurs->prefix);
925:     MatAppendOptionsPrefix(A,"sub_schurs_");

927:     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
928:        this is a workaround */
929:     if (benign_n) {
930:       Vec                    v,benign_AIIm1_ones;
931:       ISLocalToGlobalMapping N_to_reor;
932:       IS                     is_p0,is_p0_p;
933:       PetscScalar            *cs_AIB,*AIIm1_data;
934:       PetscInt               sizeA;

936:       ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor);
937:       ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
938:       ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p);
939:       ISDestroy(&is_p0);
940:       MatCreateVecs(A,&v,&benign_AIIm1_ones);
941:       VecGetSize(v,&sizeA);
942:       MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat);
943:       MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat);
944:       MatDenseGetArray(cs_AIB_mat,&cs_AIB);
945:       MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data);
946:       PetscMalloc1(benign_n,&is_p_r);
947:       /* compute colsum of A_IB restricted to pressures */
948:       for (i=0;i<benign_n;i++) {
949:         const PetscScalar *array;
950:         const PetscInt    *idxs;
951:         PetscInt          j,nz;

953:         ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]);
954:         ISGetLocalSize(is_p_r[i],&nz);
955:         ISGetIndices(is_p_r[i],&idxs);
956:         for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
957:         ISRestoreIndices(is_p_r[i],&idxs);
958:         VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i);
959:         MatMult(A,benign_AIIm1_ones,v);
960:         VecResetArray(benign_AIIm1_ones);
961:         VecGetArrayRead(v,&array);
962:         for (j=0;j<size_schur;j++) {
963: #if defined(PETSC_USE_COMPLEX)
964:           cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
965: #else
966:           cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
967: #endif
968:         }
969:         VecRestoreArrayRead(v,&array);
970:       }
971:       MatDenseRestoreArray(cs_AIB_mat,&cs_AIB);
972:       MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data);
973:       VecDestroy(&v);
974:       VecDestroy(&benign_AIIm1_ones);
975:       MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
976:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
977:       MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
978:       MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL);
979:       ISDestroy(&is_p0_p);
980:       ISLocalToGlobalMappingDestroy(&N_to_reor);
981:     }
982:     MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric);
983:     MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
984:     MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);

986:     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
987:     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);

989:     /* when using the benign subspace trick, the local Schur complements are SPD */
990:     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
991:        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
992:     if (benign_trick) {
993:       sub_schurs->is_posdef = PETSC_TRUE;
994:       PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg);
995:       if (flg) use_cholesky = PETSC_FALSE;
996:     }

998:     if (n_I) {
999:       IS        is_schur;
1000:       char      stype[64];
1001:       PetscBool gpu = PETSC_FALSE;

1003:       if (use_cholesky) {
1004:         MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F);
1005:       } else {
1006:         MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F);
1007:       }
1008:       MatSetErrorIfFailure(A,PETSC_TRUE);
1009: #if defined(PETSC_HAVE_MKL_PARDISO)
1010:       if (benign_trick) MatMkl_PardisoSetCntl(F,10,10);
1011: #endif
1012:       /* subsets ordered last */
1013:       ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur);
1014:       MatFactorSetSchurIS(F,is_schur);
1015:       ISDestroy(&is_schur);

1017:       /* factorization step */
1018:       if (use_cholesky) {
1019:         MatCholeskyFactorSymbolic(F,A,NULL,NULL);
1020: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1021:         MatMumpsSetIcntl(F,19,2);
1022: #endif
1023:         MatCholeskyFactorNumeric(F,A,NULL);
1024:         S_lower_triangular = PETSC_TRUE;
1025:       } else {
1026:         MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
1027: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1028:         MatMumpsSetIcntl(F,19,3);
1029: #endif
1030:         MatLUFactorNumeric(F,A,NULL);
1031:       }
1032:       MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view");

1034:       if (matl_dbg_viewer) {
1035:         Mat S;
1036:         IS  is;

1038:         PetscObjectSetName((PetscObject)A,"A");
1039:         MatView(A,matl_dbg_viewer);
1040:         MatFactorCreateSchurComplement(F,&S,NULL);
1041:         PetscObjectSetName((PetscObject)S,"S");
1042:         MatView(S,matl_dbg_viewer);
1043:         MatDestroy(&S);
1044:         ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is);
1045:         PetscObjectSetName((PetscObject)is,"I");
1046:         ISView(is,matl_dbg_viewer);
1047:         ISDestroy(&is);
1048:         ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is);
1049:         PetscObjectSetName((PetscObject)is,"B");
1050:         ISView(is,matl_dbg_viewer);
1051:         ISDestroy(&is);
1052:         PetscObjectSetName((PetscObject)is_A_all,"IA");
1053:         ISView(is_A_all,matl_dbg_viewer);
1054:       }

1056:       /* get explicit Schur Complement computed during numeric factorization */
1057:       MatFactorGetSchurComplement(F,&S_all,NULL);
1058:       PetscStrncpy(stype,MATSEQDENSE,sizeof(stype));
1059: #if defined(PETSC_HAVE_CUDA)
1060:       PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,"");
1061: #endif
1062:       if (gpu) {
1063:         PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype));
1064:       }
1065:       PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL);
1066:       MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all);
1067:       MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef);
1068:       MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian);
1069:       MatGetType(S_all,&Stype);

1071:       /* we can reuse the solvers if we are not using the economic version */
1072:       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1073:       factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1074:       if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
1075:         reuse_solvers = factor_workaround = PETSC_FALSE;

1077:       solver_S = PETSC_TRUE;

1079:       /* update the Schur complement with the change of basis on the pressures */
1080:       if (benign_n) {
1081:         const PetscScalar *cs_AIB;
1082:         PetscScalar       *S_data,*AIIm1_data;
1083:         Mat               S2 = NULL,S3 = NULL; /* dbg */
1084:         PetscScalar       *S2_data,*S3_data; /* dbg */
1085:         Vec               v,benign_AIIm1_ones;
1086:         PetscInt          sizeA;

1088:         MatDenseGetArray(S_all,&S_data);
1089:         MatCreateVecs(A,&v,&benign_AIIm1_ones);
1090:         VecGetSize(v,&sizeA);
1091: #if defined(PETSC_HAVE_MUMPS)
1092:         MatMumpsSetIcntl(F,26,0);
1093: #endif
1094: #if defined(PETSC_HAVE_MKL_PARDISO)
1095:         MatMkl_PardisoSetCntl(F,70,1);
1096: #endif
1097:         MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB);
1098:         MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data);
1099:         if (matl_dbg_viewer) {
1100:           MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2);
1101:           MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3);
1102:           MatDenseGetArray(S2,&S2_data);
1103:           MatDenseGetArray(S3,&S3_data);
1104:         }
1105:         for (i=0;i<benign_n;i++) {
1106:           PetscScalar    *array,sum = 0.,one = 1.,*sums;
1107:           const PetscInt *idxs;
1108:           PetscInt       k,j,nz;
1109:           PetscBLASInt   B_k,B_n;

1111:           PetscCalloc1(benign_n,&sums);
1112:           VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i);
1113:           VecCopy(benign_AIIm1_ones,v);
1114:           MatSolve(F,v,benign_AIIm1_ones);
1115:           MatMult(A,benign_AIIm1_ones,v);
1116:           VecResetArray(benign_AIIm1_ones);
1117:           /* p0 dofs (eliminated) are excluded from the sums */
1118:           for (k=0;k<benign_n;k++) {
1119:             ISGetLocalSize(is_p_r[k],&nz);
1120:             ISGetIndices(is_p_r[k],&idxs);
1121:             for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1122:             ISRestoreIndices(is_p_r[k],&idxs);
1123:           }
1124:           VecGetArrayRead(v,(const PetscScalar**)&array);
1125:           if (matl_dbg_viewer) {
1126:             Vec  vv;
1127:             char name[16];

1129:             VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv);
1130:             PetscSNPrintf(name,sizeof(name),"Pvs%D",i);
1131:             PetscObjectSetName((PetscObject)vv,name);
1132:             VecView(vv,matl_dbg_viewer);
1133:           }
1134:           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1135:           /* cs_AIB already scaled by 1./nz */
1136:           B_k = 1;
1137:           for (k=0;k<benign_n;k++) {
1138:             sum  = sums[k];
1139:             PetscBLASIntCast(size_schur,&B_n);

1141:             if (PetscAbsScalar(sum) == 0.0) continue;
1142:             if (k == i) {
1143:               PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1144:               if (matl_dbg_viewer) {
1145:                 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1146:               }
1147:             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1148:               sum /= 2.0;
1149:               PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1150:               if (matl_dbg_viewer) {
1151:                 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1152:               }
1153:             }
1154:           }
1155:           sum  = 1.;
1156:           PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1157:           if (matl_dbg_viewer) {
1158:             PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
1159:           }
1160:           VecRestoreArrayRead(v,(const PetscScalar**)&array);
1161:           /* set p0 entry of AIIm1_ones to zero */
1162:           ISGetLocalSize(is_p_r[i],&nz);
1163:           ISGetIndices(is_p_r[i],&idxs);
1164:           for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1165:           ISRestoreIndices(is_p_r[i],&idxs);
1166:           PetscFree(sums);
1167:         }
1168:         VecDestroy(&benign_AIIm1_ones);
1169:         if (matl_dbg_viewer) {
1170:           MatDenseRestoreArray(S2,&S2_data);
1171:           MatDenseRestoreArray(S3,&S3_data);
1172:         }
1173:         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1174:           PetscInt k,j;
1175:           for (k=0;k<size_schur;k++) {
1176:             for (j=k;j<size_schur;j++) {
1177:               S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1178:             }
1179:           }
1180:         }

1182:         /* restore defaults */
1183: #if defined(PETSC_HAVE_MUMPS)
1184:         MatMumpsSetIcntl(F,26,-1);
1185: #endif
1186: #if defined(PETSC_HAVE_MKL_PARDISO)
1187:         MatMkl_PardisoSetCntl(F,70,0);
1188: #endif
1189:         MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB);
1190:         MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data);
1191:         VecDestroy(&v);
1192:         MatDenseRestoreArray(S_all,&S_data);
1193:         if (matl_dbg_viewer) {
1194:           Mat S;

1196:           MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1197:           MatFactorCreateSchurComplement(F,&S,NULL);
1198:           PetscObjectSetName((PetscObject)S,"Sb");
1199:           MatView(S,matl_dbg_viewer);
1200:           MatDestroy(&S);
1201:           PetscObjectSetName((PetscObject)S2,"S2P");
1202:           MatView(S2,matl_dbg_viewer);
1203:           PetscObjectSetName((PetscObject)S3,"S3P");
1204:           MatView(S3,matl_dbg_viewer);
1205:           PetscObjectSetName((PetscObject)cs_AIB_mat,"cs");
1206:           MatView(cs_AIB_mat,matl_dbg_viewer);
1207:           MatFactorGetSchurComplement(F,&S_all,NULL);
1208:         }
1209:         MatDestroy(&S2);
1210:         MatDestroy(&S3);
1211:       }
1212:       if (!reuse_solvers) {
1213:         for (i=0;i<benign_n;i++) {
1214:           ISDestroy(&is_p_r[i]);
1215:         }
1216:         PetscFree(is_p_r);
1217:         MatDestroy(&cs_AIB_mat);
1218:         MatDestroy(&benign_AIIm1_ones_mat);
1219:       }
1220:     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1221:       MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
1222:       MatGetType(S_all,&Stype);
1223:       reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1224:       factor_workaround = PETSC_FALSE;
1225:       solver_S = PETSC_FALSE;
1226:     }

1228:     if (reuse_solvers) {
1229:       Mat                A_II,Afake;
1230:       Vec                vec1_B;
1231:       PCBDDCReuseSolvers msolv_ctx;
1232:       PetscInt           n_R;

1234:       if (sub_schurs->reuse_solver) {
1235:         PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1236:       } else {
1237:         PetscNew(&sub_schurs->reuse_solver);
1238:       }
1239:       msolv_ctx = sub_schurs->reuse_solver;
1240:       MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
1241:       PetscObjectReference((PetscObject)F);
1242:       msolv_ctx->F = F;
1243:       MatCreateVecs(F,&msolv_ctx->sol,NULL);
1244:       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1245:       {
1246:         PetscScalar *array;
1247:         PetscInt    n;

1249:         VecGetLocalSize(msolv_ctx->sol,&n);
1250:         VecGetArray(msolv_ctx->sol,&array);
1251:         VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs);
1252:         VecRestoreArray(msolv_ctx->sol,&array);
1253:       }
1254:       msolv_ctx->has_vertices = schur_has_vertices;

1256:       /* interior solver */
1257:       PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver);
1258:       PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
1259:       PCSetType(msolv_ctx->interior_solver,PCSHELL);
1260:       PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)");
1261:       PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
1262:       PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View);
1263:       PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior);
1264:       PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose);

1266:       /* correction solver */
1267:       PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver);
1268:       PCSetType(msolv_ctx->correction_solver,PCSHELL);
1269:       PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)");
1270:       PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
1271:       PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View);
1272:       PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction);
1273:       PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose);

1275:       /* scatter and vecs for Schur complement solver */
1276:       MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
1277:       MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
1278:       if (!schur_has_vertices) {
1279:         ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
1280:         VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
1281:         PetscObjectReference((PetscObject)is_A_all);
1282:         msolv_ctx->is_R = is_A_all;
1283:       } else {
1284:         IS              is_B_all;
1285:         const PetscInt* idxs;
1286:         PetscInt        dual,n_v,n;

1288:         ISGetLocalSize(sub_schurs->is_vertices,&n_v);
1289:         dual = size_schur - n_v;
1290:         ISGetLocalSize(is_A_all,&n);
1291:         ISGetIndices(is_A_all,&idxs);
1292:         ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all);
1293:         ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B);
1294:         ISDestroy(&is_B_all);
1295:         ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all);
1296:         VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B);
1297:         ISDestroy(&is_B_all);
1298:         ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R);
1299:         ISRestoreIndices(is_A_all,&idxs);
1300:       }
1301:       ISGetLocalSize(msolv_ctx->is_R,&n_R);
1302:       MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake);
1303:       MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY);
1304:       MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY);
1305:       PCSetOperators(msolv_ctx->correction_solver,Afake,Afake);
1306:       MatDestroy(&Afake);
1307:       VecDestroy(&vec1_B);

1309:       /* communicate benign info to solver context */
1310:       if (benign_n) {
1311:         PetscScalar *array;

1313:         msolv_ctx->benign_n = benign_n;
1314:         msolv_ctx->benign_zerodiag_subs = is_p_r;
1315:         PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals);
1316:         msolv_ctx->benign_csAIB = cs_AIB_mat;
1317:         MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL);
1318:         VecGetArray(msolv_ctx->benign_corr_work,&array);
1319:         VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec);
1320:         VecRestoreArray(msolv_ctx->benign_corr_work,&array);
1321:         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1322:       }
1323:     } else {
1324:       if (sub_schurs->reuse_solver) {
1325:         PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1326:       }
1327:       PetscFree(sub_schurs->reuse_solver);
1328:     }
1329:     MatDestroy(&A);
1330:     ISDestroy(&is_A_all);

1332:     /* Work arrays */
1333:     PetscMalloc1(max_subset_size*max_subset_size,&work);

1335:     /* S_Ej_all */
1336:     cum = cum2 = 0;
1337:     MatDenseGetArrayRead(S_all,&rS_data);
1338:     MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr);
1339:     if (compute_Stilda) {
1340:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr);
1341:     }
1342:     for (i=0;i<sub_schurs->n_subs;i++) {
1343:       PetscInt j;

1345:       /* get S_E */
1346:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1347:       if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1348:         PetscInt k;
1349:         for (k=0;k<subset_size;k++) {
1350:           for (j=k;j<subset_size;j++) {
1351:             work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1352:             work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]);
1353:           }
1354:         }
1355:       } else { /* just copy to workspace */
1356:         PetscInt k;
1357:         for (k=0;k<subset_size;k++) {
1358:           for (j=0;j<subset_size;j++) {
1359:             work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1360:           }
1361:         }
1362:       }
1363:       /* insert S_E values */
1364:       if (sub_schurs->change) {
1365:         Mat change_sub,SEj,T;

1367:         /* change basis */
1368:         KSPGetOperators(sub_schurs->change[i],&change_sub,NULL);
1369:         MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1370:         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1371:           Mat T2;
1372:           MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2);
1373:           MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1374:           MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T);
1375:           MatDestroy(&T2);
1376:         } else {
1377:           MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1378:         }
1379:         MatCopy(T,SEj,SAME_NONZERO_PATTERN);
1380:         MatDestroy(&T);
1381:         MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL);
1382:         MatDestroy(&SEj);
1383:       }
1384:       if (deluxe) {
1385:         PetscArraycpy(SEj_arr,work,subset_size*subset_size);
1386:         /* if adaptivity is requested, invert S_E blocks */
1387:         if (compute_Stilda) {
1388:           Mat               M;
1389:           const PetscScalar *vals;
1390:           PetscBool         isdense,isdensecuda;

1392:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M);
1393:           MatSetOption(M,MAT_SPD,sub_schurs->is_posdef);
1394:           MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian);
1395:           if (!PetscBTLookup(sub_schurs->is_edge,i)) {
1396:             MatSetType(M,Stype);
1397:           }
1398:           PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense);
1399:           PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda);
1400:           if (use_cholesky) {
1401:             MatCholeskyFactor(M,NULL,NULL);
1402:           } else {
1403:             MatLUFactor(M,NULL,NULL,NULL);
1404:           }
1405:           if (isdense) {
1406:             MatSeqDenseInvertFactors_Private(M);
1407: #if defined(PETSC_HAVE_CUDA)
1408:           } else if (isdensecuda) {
1409:             MatSeqDenseCUDAInvertFactors_Private(M);
1410: #endif
1411:           } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype);
1412:           MatDenseGetArrayRead(M,&vals);
1413:           PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size);
1414:           MatDenseRestoreArrayRead(M,&vals);
1415:           MatDestroy(&M);
1416:         }
1417:       } else if (compute_Stilda) { /* not using deluxe */
1418:         Mat         SEj;
1419:         Vec         D;
1420:         PetscScalar *array;

1422:         MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1423:         VecGetArray(Dall,&array);
1424:         VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D);
1425:         VecRestoreArray(Dall,&array);
1426:         VecShift(D,-1.);
1427:         MatDiagonalScale(SEj,D,D);
1428:         MatDestroy(&SEj);
1429:         VecDestroy(&D);
1430:         PetscArraycpy(SEj_arr,work,subset_size*subset_size);
1431:       }
1432:       cum += subset_size;
1433:       cum2 += subset_size*(size_schur + 1);
1434:       SEj_arr += subset_size*subset_size;
1435:       if (SEjinv_arr) SEjinv_arr += subset_size*subset_size;
1436:     }
1437:     MatDenseRestoreArrayRead(S_all,&rS_data);
1438:     MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr);
1439:     if (compute_Stilda) {
1440:       MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr);
1441:     }
1442:     if (solver_S) {
1443:       MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1444:     }

1446:     /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1447:        however, preliminary tests indicate using GPUs is still faster in the solve phase */
1448: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1449:     if (reuse_solvers) {
1450:       Mat                  St;
1451:       MatFactorSchurStatus st;

1453:       flg  = PETSC_FALSE;
1454:       PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL);
1455:       MatFactorGetSchurComplement(F,&St,&st);
1456:       MatBindToCPU(St,flg);
1457:       MatFactorRestoreSchurComplement(F,&St,st);
1458:     }
1459: #endif

1461:     schur_factor = NULL;
1462:     if (compute_Stilda && size_active_schur) {

1464:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr);
1465:       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1466:         PetscArraycpy(SEjinv_arr,work,size_schur*size_schur);
1467:       } else {
1468:         Mat S_all_inv=NULL;

1470:         if (solver_S) {
1471:           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1472:              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1473:           if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1474:             PetscScalar *data;
1475:             PetscInt     nd = 0;

1477:             if (!use_potr) {
1478:               SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1479:             }
1480:             MatFactorGetSchurComplement(F,&S_all_inv,NULL);
1481:             MatDenseGetArray(S_all_inv,&data);
1482:             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1483:               ISGetLocalSize(sub_schurs->is_dir,&nd);
1484:             }

1486:             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1487:             if (schur_has_vertices) {
1488:               Mat          M;
1489:               PetscScalar *tdata;
1490:               PetscInt     nv = 0, news;

1492:               ISGetLocalSize(sub_schurs->is_vertices,&nv);
1493:               news = size_active_schur + nv;
1494:               PetscCalloc1(news*news,&tdata);
1495:               for (i=0;i<size_active_schur;i++) {
1496:                 PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i);
1497:                 PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv);
1498:               }
1499:               for (i=0;i<nv;i++) {
1500:                 PetscInt k = i+size_active_schur;
1501:                 PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i);
1502:               }

1504:               MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M);
1505:               MatSetOption(M,MAT_SPD,PETSC_TRUE);
1506:               MatCholeskyFactor(M,NULL,NULL);
1507:               /* save the factors */
1508:               cum = 0;
1509:               PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor);
1510:               for (i=0;i<size_active_schur;i++) {
1511:                 PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i);
1512:                 cum += size_active_schur - i;
1513:               }
1514:               for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1515:               MatSeqDenseInvertFactors_Private(M);
1516:               /* move back just the active dofs to the Schur complement */
1517:               for (i=0;i<size_active_schur;i++) {
1518:                 PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur);
1519:               }
1520:               PetscFree(tdata);
1521:               MatDestroy(&M);
1522:             } else { /* we can factorize and invert just the activedofs */
1523:               Mat         M;
1524:               PetscScalar *aux;

1526:               PetscMalloc1(nd,&aux);
1527:               for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1528:               MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M);
1529:               MatDenseSetLDA(M,size_schur);
1530:               MatSetOption(M,MAT_SPD,PETSC_TRUE);
1531:               MatCholeskyFactor(M,NULL,NULL);
1532:               MatSeqDenseInvertFactors_Private(M);
1533:               MatDestroy(&M);
1534:               MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M);
1535:               MatZeroEntries(M);
1536:               MatDestroy(&M);
1537:               MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M);
1538:               MatDenseSetLDA(M,size_schur);
1539:               MatZeroEntries(M);
1540:               MatDestroy(&M);
1541:               for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1542:               PetscFree(aux);
1543:             }
1544:             MatDenseRestoreArray(S_all_inv,&data);
1545:           } else { /* use MatFactor calls to invert S */
1546:             MatFactorInvertSchurComplement(F);
1547:             MatFactorGetSchurComplement(F,&S_all_inv,NULL);
1548:           }
1549:         } else { /* we need to invert explicitly since we are not using MatFactor for S */
1550:           PetscObjectReference((PetscObject)S_all);
1551:           S_all_inv = S_all;
1552:           MatDenseGetArray(S_all_inv,&S_data);
1553:           PetscBLASIntCast(size_schur,&B_N);
1554:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1555:           if (use_potr) {
1556:             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
1558:             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
1560:           } else if (use_sytr) {
1561:             PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1563:             PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
1565:           } else {
1566:             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
1568:             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1570:           }
1571:           PetscLogFlops(1.0*size_schur*size_schur*size_schur);
1572:           PetscFPTrapPop();
1573:           MatDenseRestoreArray(S_all_inv,&S_data);
1574:         }
1575:         /* S_Ej_tilda_all */
1576:         cum = cum2 = 0;
1577:         MatDenseGetArrayRead(S_all_inv,&rS_data);
1578:         for (i=0;i<sub_schurs->n_subs;i++) {
1579:           PetscInt j;

1581:           ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1582:           /* get (St^-1)_E */
1583:           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1584:              will be properly accessed later during adaptive selection */
1585:           if (S_lower_triangular) {
1586:             PetscInt k;
1587:             if (sub_schurs->change) {
1588:               for (k=0;k<subset_size;k++) {
1589:                 for (j=k;j<subset_size;j++) {
1590:                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1591:                   work[j*subset_size+k] = work[k*subset_size+j];
1592:                 }
1593:               }
1594:             } else {
1595:               for (k=0;k<subset_size;k++) {
1596:                 for (j=k;j<subset_size;j++) {
1597:                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1598:                 }
1599:               }
1600:             }
1601:           } else {
1602:             PetscInt k;
1603:             for (k=0;k<subset_size;k++) {
1604:               for (j=0;j<subset_size;j++) {
1605:                 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1606:               }
1607:             }
1608:           }
1609:           if (sub_schurs->change) {
1610:             Mat change_sub,SEj,T;

1612:             /* change basis */
1613:             KSPGetOperators(sub_schurs->change[i],&change_sub,NULL);
1614:             MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1615:             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1616:               Mat T2;
1617:               MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2);
1618:               MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1619:               MatDestroy(&T2);
1620:               MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T);
1621:             } else {
1622:               MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1623:             }
1624:             MatCopy(T,SEj,SAME_NONZERO_PATTERN);
1625:             MatDestroy(&T);
1626:             /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */
1627:             MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL);
1628:             MatDestroy(&SEj);
1629:           }
1630:           PetscArraycpy(SEjinv_arr,work,subset_size*subset_size);
1631:           cum += subset_size;
1632:           cum2 += subset_size*(size_schur + 1);
1633:           SEjinv_arr += subset_size*subset_size;
1634:         }
1635:         MatDenseRestoreArrayRead(S_all_inv,&rS_data);
1636:         if (solver_S) {
1637:           if (schur_has_vertices) {
1638:             MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED);
1639:           } else {
1640:             MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED);
1641:           }
1642:         }
1643:         MatDestroy(&S_all_inv);
1644:       }
1645:       MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr);

1647:       /* move back factors if needed */
1648:       if (schur_has_vertices) {
1649:         Mat      S_tmp;
1650:         PetscInt nd = 0;

1653:         MatFactorGetSchurComplement(F,&S_tmp,NULL);
1654:         if (use_potr) {
1655:           PetscScalar *data;

1657:           MatDenseGetArray(S_tmp,&data);
1658:           PetscArrayzero(data,size_schur*size_schur);

1660:           if (S_lower_triangular) {
1661:             cum = 0;
1662:             for (i=0;i<size_active_schur;i++) {
1663:               PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i);
1664:               cum += size_active_schur-i;
1665:             }
1666:           } else {
1667:             PetscArraycpy(data,schur_factor,size_schur*size_schur);
1668:           }
1669:           if (sub_schurs->is_dir) {
1670:             ISGetLocalSize(sub_schurs->is_dir,&nd);
1671:             for (i=0;i<nd;i++) {
1672:               data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1673:             }
1674:           }
1675:           /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1676:              set the diagonal entry of the Schur factor to a very large value */
1677:           for (i=size_active_schur+nd;i<size_schur;i++) {
1678:             data[i*(size_schur+1)] = infty;
1679:           }
1680:           MatDenseRestoreArray(S_tmp,&data);
1681:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1682:         MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED);
1683:       }
1684:     } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */
1685:       PetscScalar *data;
1686:       PetscInt    nd = 0;

1688:       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1689:         ISGetLocalSize(sub_schurs->is_dir,&nd);
1690:       }
1691:       MatFactorGetSchurComplement(F,&S_all,NULL);
1692:       MatDenseGetArray(S_all,&data);
1693:       for (i=0;i<size_active_schur;i++) {
1694:         PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur);
1695:       }
1696:       for (i=size_active_schur+nd;i<size_schur;i++) {
1697:         PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur);
1698:         data[i*(size_schur+1)] = infty;
1699:       }
1700:       MatDenseRestoreArray(S_all,&data);
1701:       MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1702:     }
1703:     PetscFree(work);
1704:     PetscFree(schur_factor);
1705:     VecDestroy(&Dall);
1706:   }
1707:   ISDestroy(&is_I_layer);
1708:   MatDestroy(&S_all);
1709:   MatDestroy(&A_BB);
1710:   MatDestroy(&A_IB);
1711:   MatDestroy(&A_BI);
1712:   MatDestroy(&F);

1714:   PetscMalloc1(sub_schurs->n_subs,&nnz);
1715:   for (i=0;i<sub_schurs->n_subs;i++) {
1716:     ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]);
1717:   }
1718:   ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer);
1719:   MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz);
1720:   MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
1721:   MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
1722:   if (compute_Stilda) {
1723:     MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz);
1724:     MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
1725:     MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
1726:     if (deluxe) {
1727:       MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz);
1728:       MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
1729:       MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
1730:     }
1731:   }
1732:   ISDestroy(&is_I_layer);

1734:   /* Get local part of (\sum_j S_Ej) */
1735:   if (!sub_schurs->sum_S_Ej_all) {
1736:     MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all);
1737:   }
1738:   VecSet(gstash,0.0);
1739:   MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray);
1740:   VecPlaceArray(lstash,stasharray);
1741:   VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1742:   VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1743:   MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray);
1744:   VecResetArray(lstash);
1745:   MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray);
1746:   VecPlaceArray(lstash,stasharray);
1747:   VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1748:   VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1749:   MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray);
1750:   VecResetArray(lstash);

1752:   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1753:   if (compute_Stilda) {
1754:     VecSet(gstash,0.0);
1755:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray);
1756:     VecPlaceArray(lstash,stasharray);
1757:     VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1758:     VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1759:     VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1760:     VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1761:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray);
1762:     VecResetArray(lstash);
1763:     if (deluxe) {
1764:       VecSet(gstash,0.0);
1765:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray);
1766:       VecPlaceArray(lstash,stasharray);
1767:       VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1768:       VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1769:       VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1770:       VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1771:       MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray);
1772:       VecResetArray(lstash);
1773:     } else {
1774:       PetscScalar *array;
1775:       PetscInt    cum;

1777:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array);
1778:       cum = 0;
1779:       for (i=0;i<sub_schurs->n_subs;i++) {
1780:         ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1781:         PetscBLASIntCast(subset_size,&B_N);
1782:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1783:         if (use_potr) {
1784:           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1786:           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1788:         } else if (use_sytr) {
1789:           PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1791:           PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
1793:         } else {
1794:           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1796:           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1798:         }
1799:         PetscLogFlops(1.0*subset_size*subset_size*subset_size);
1800:         PetscFPTrapPop();
1801:         cum += subset_size*subset_size;
1802:       }
1803:       MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array);
1804:       PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all);
1805:       MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1806:       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1807:     }
1808:   }
1809:   VecDestroy(&lstash);
1810:   VecDestroy(&gstash);
1811:   VecScatterDestroy(&sstash);

1813:   if (matl_dbg_viewer) {
1814:     PetscInt cum;

1816:     if (sub_schurs->S_Ej_all) {
1817:       PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE");
1818:       MatView(sub_schurs->S_Ej_all,matl_dbg_viewer);
1819:     }
1820:     if (sub_schurs->sum_S_Ej_all) {
1821:       PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE");
1822:       MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer);
1823:     }
1824:     if (sub_schurs->sum_S_Ej_inv_all) {
1825:       PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm");
1826:       MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer);
1827:     }
1828:     if (sub_schurs->sum_S_Ej_tilda_all) {
1829:       PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt");
1830:       MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer);
1831:     }
1832:     for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
1833:       IS   is;
1834:       char name[16];

1836:       PetscSNPrintf(name,sizeof(name),"IE%D",i);
1837:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1838:       ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is);
1839:       PetscObjectSetName((PetscObject)is,name);
1840:       ISView(is,matl_dbg_viewer);
1841:       ISDestroy(&is);
1842:       cum += subset_size;
1843:     }
1844:   }

1846:   /* free workspace */
1847:   if (matl_dbg_viewer) PetscViewerFlush(matl_dbg_viewer);
1848:   if (sub_schurs->debug) MPI_Barrier(comm_n);
1849:   PetscViewerDestroy(&matl_dbg_viewer);
1850:   PetscFree2(Bwork,pivots);
1851:   PetscCommDestroy(&comm_n);
1852:   return 0;
1853: }

1855: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc)
1856: {
1857:   IS              *faces,*edges,*all_cc,vertices;
1858:   PetscInt        i,n_faces,n_edges,n_all_cc;
1859:   PetscBool       is_sorted,ispardiso,ismumps;
1860:   PetscErrorCode  ierr;

1862:   ISSorted(is_I,&is_sorted);
1864:   ISSorted(is_B,&is_sorted);

1867:   /* reset any previous data */
1868:   PCBDDCSubSchursReset(sub_schurs);

1870:   /* get index sets for faces and edges (already sorted by global ordering) */
1871:   PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1872:   n_all_cc = n_faces+n_edges;
1873:   PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1874:   PetscMalloc1(n_all_cc,&all_cc);
1875:   for (i=0;i<n_faces;i++) {
1876:     if (copycc) {
1877:       ISDuplicate(faces[i],&all_cc[i]);
1878:     } else {
1879:       PetscObjectReference((PetscObject)faces[i]);
1880:       all_cc[i] = faces[i];
1881:     }
1882:   }
1883:   for (i=0;i<n_edges;i++) {
1884:     if (copycc) {
1885:       ISDuplicate(edges[i],&all_cc[n_faces+i]);
1886:     } else {
1887:       PetscObjectReference((PetscObject)edges[i]);
1888:       all_cc[n_faces+i] = edges[i];
1889:     }
1890:     PetscBTSet(sub_schurs->is_edge,n_faces+i);
1891:   }
1892:   PetscObjectReference((PetscObject)vertices);
1893:   sub_schurs->is_vertices = vertices;
1894:   PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1895:   sub_schurs->is_dir = NULL;
1896:   PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);

1898:   /* Determine if MatFactor can be used */
1899:   PetscStrallocpy(prefix,&sub_schurs->prefix);
1900: #if defined(PETSC_HAVE_MUMPS)
1901:   PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type));
1902: #elif defined(PETSC_HAVE_MKL_PARDISO)
1903:   PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type));
1904: #else
1905:   PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type));
1906: #endif
1907: #if defined(PETSC_USE_COMPLEX)
1908:   sub_schurs->is_hermitian  = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1909: #else
1910:   sub_schurs->is_hermitian  = PETSC_TRUE;
1911: #endif
1912:   sub_schurs->is_posdef     = PETSC_TRUE;
1913:   sub_schurs->is_symmetric  = PETSC_TRUE;
1914:   sub_schurs->debug         = PETSC_FALSE;
1915:   sub_schurs->restrict_comm = PETSC_FALSE;
1916:   PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
1917:   PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL);
1918:   PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL);
1919:   PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL);
1920:   PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL);
1921:   PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL);
1922:   PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL);
1923:   PetscOptionsEnd();
1924:   PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps);
1925:   PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso);
1926:   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);

1928:   /* for reals, symmetric and hermitian are synonims */
1929: #if !defined(PETSC_USE_COMPLEX)
1930:   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1931:   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1932: #endif

1934:   PetscObjectReference((PetscObject)is_I);
1935:   sub_schurs->is_I = is_I;
1936:   PetscObjectReference((PetscObject)is_B);
1937:   sub_schurs->is_B = is_B;
1938:   PetscObjectReference((PetscObject)graph->l2gmap);
1939:   sub_schurs->l2gmap = graph->l2gmap;
1940:   PetscObjectReference((PetscObject)BtoNmap);
1941:   sub_schurs->BtoNmap = BtoNmap;
1942:   sub_schurs->n_subs = n_all_cc;
1943:   sub_schurs->is_subs = all_cc;
1944:   sub_schurs->S_Ej_all = NULL;
1945:   sub_schurs->sum_S_Ej_all = NULL;
1946:   sub_schurs->sum_S_Ej_inv_all = NULL;
1947:   sub_schurs->sum_S_Ej_tilda_all = NULL;
1948:   sub_schurs->is_Ej_all = NULL;
1949:   return 0;
1950: }

1952: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1953: {
1954:   PCBDDCSubSchurs schurs_ctx;

1956:   PetscNew(&schurs_ctx);
1957:   schurs_ctx->n_subs = 0;
1958:   *sub_schurs = schurs_ctx;
1959:   return 0;
1960: }

1962: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1963: {
1964:   PetscInt       i;

1966:   if (!sub_schurs) return 0;
1967:   PetscFree(sub_schurs->prefix);
1968:   MatDestroy(&sub_schurs->A);
1969:   MatDestroy(&sub_schurs->S);
1970:   ISDestroy(&sub_schurs->is_I);
1971:   ISDestroy(&sub_schurs->is_B);
1972:   ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1973:   ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1974:   MatDestroy(&sub_schurs->S_Ej_all);
1975:   MatDestroy(&sub_schurs->sum_S_Ej_all);
1976:   MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1977:   MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1978:   ISDestroy(&sub_schurs->is_Ej_all);
1979:   ISDestroy(&sub_schurs->is_vertices);
1980:   ISDestroy(&sub_schurs->is_dir);
1981:   PetscBTDestroy(&sub_schurs->is_edge);
1982:   for (i=0;i<sub_schurs->n_subs;i++) {
1983:     ISDestroy(&sub_schurs->is_subs[i]);
1984:   }
1985:   if (sub_schurs->n_subs) {
1986:     PetscFree(sub_schurs->is_subs);
1987:   }
1988:   if (sub_schurs->reuse_solver) {
1989:     PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1990:   }
1991:   PetscFree(sub_schurs->reuse_solver);
1992:   if (sub_schurs->change) {
1993:     for (i=0;i<sub_schurs->n_subs;i++) {
1994:       KSPDestroy(&sub_schurs->change[i]);
1995:       ISDestroy(&sub_schurs->change_primal_sub[i]);
1996:     }
1997:   }
1998:   PetscFree(sub_schurs->change);
1999:   PetscFree(sub_schurs->change_primal_sub);
2000:   sub_schurs->n_subs = 0;
2001:   return 0;
2002: }

2004: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
2005: {
2006:   PCBDDCSubSchursReset(*sub_schurs);
2007:   PetscFree(*sub_schurs);
2008:   return 0;
2009: }

2011: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
2012: {
2013:   PetscInt       i,j,n;

2015:   n = 0;
2016:   for (i=-n_prev;i<0;i++) {
2017:     PetscInt start_dof = queue_tip[i];
2018:     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
2019:       PetscInt dof = adjncy[j];
2020:       if (!PetscBTLookup(touched,dof)) {
2021:         PetscBTSet(touched,dof);
2022:         queue_tip[n] = dof;
2023:         n++;
2024:       }
2025:     }
2026:   }
2027:   *n_added = n;
2028:   return 0;
2029: }