Actual source code: bddcprivate.c

petsc-3.15.0 2021-03-30
Report Typos and Errors
  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/ksp/pc/impls/bddc/bddc.h>
  3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4: #include <../src/mat/impls/dense/seq/dense.h>
  5: #include <petscdmplex.h>
  6: #include <petscblaslapack.h>
  7: #include <petsc/private/sfimpl.h>
  8: #include <petsc/private/dmpleximpl.h>
  9: #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17:   PetscScalar    *uwork,*data,*U, ds = 0.;
 18:   PetscReal      *sing;
 19:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 20:   PetscInt       ulw,i,nr,nc,n;
 22: #if defined(PETSC_USE_COMPLEX)
 23:   PetscReal      *rwork2;
 24: #endif

 27:   MatGetSize(A,&nr,&nc);
 28:   if (!nr || !nc) return(0);

 30:   /* workspace */
 31:   if (!work) {
 32:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 33:     PetscMalloc1(ulw,&uwork);
 34:   } else {
 35:     ulw   = lw;
 36:     uwork = work;
 37:   }
 38:   n = PetscMin(nr,nc);
 39:   if (!rwork) {
 40:     PetscMalloc1(n,&sing);
 41:   } else {
 42:     sing = rwork;
 43:   }

 45:   /* SVD */
 46:   PetscMalloc1(nr*nr,&U);
 47:   PetscBLASIntCast(nr,&bM);
 48:   PetscBLASIntCast(nc,&bN);
 49:   PetscBLASIntCast(ulw,&lwork);
 50:   MatDenseGetArray(A,&data);
 51:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 52: #if !defined(PETSC_USE_COMPLEX)
 53:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 54: #else
 55:   PetscMalloc1(5*n,&rwork2);
 56:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr));
 57:   PetscFree(rwork2);
 58: #endif
 59:   PetscFPTrapPop();
 60:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
 61:   MatDenseRestoreArray(A,&data);
 62:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 63:   if (!rwork) {
 64:     PetscFree(sing);
 65:   }
 66:   if (!work) {
 67:     PetscFree(uwork);
 68:   }
 69:   /* create B */
 70:   if (!range) {
 71:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 72:     MatDenseGetArray(*B,&data);
 73:     PetscArraycpy(data,U+nr*i,(nr-i)*nr);
 74:   } else {
 75:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 76:     MatDenseGetArray(*B,&data);
 77:     PetscArraycpy(data,U,i*nr);
 78:   }
 79:   MatDenseRestoreArray(*B,&data);
 80:   PetscFree(U);
 81:   return(0);
 82: }

 84: /* TODO REMOVE */
 85: #if defined(PRINT_GDET)
 86: static int inc = 0;
 87: static int lev = 0;
 88: #endif

 90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 91: {
 93:   Mat            GE,GEd;
 94:   PetscInt       rsize,csize,esize;
 95:   PetscScalar    *ptr;

 98:   ISGetSize(edge,&esize);
 99:   if (!esize) return(0);
100:   ISGetSize(extrow,&rsize);
101:   ISGetSize(extcol,&csize);

103:   /* gradients */
104:   ptr  = work + 5*esize;
105:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108:   MatDestroy(&GE);

110:   /* constants */
111:   ptr += rsize*csize;
112:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115:   MatDestroy(&GE);
116:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117:   MatDestroy(&GEd);

119:   if (corners) {
120:     Mat               GEc;
121:     const PetscScalar *vals;
122:     PetscScalar       v;

124:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
125:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
126:     MatDenseGetArrayRead(GEd,&vals);
127:     /* v    = PetscAbsScalar(vals[0]) */;
128:     v    = 1.;
129:     cvals[0] = vals[0]/v;
130:     cvals[1] = vals[1]/v;
131:     MatDenseRestoreArrayRead(GEd,&vals);
132:     MatScale(*GKins,1./v);
133: #if defined(PRINT_GDET)
134:     {
135:       PetscViewer viewer;
136:       char filename[256];
137:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
138:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
139:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
140:       PetscObjectSetName((PetscObject)GEc,"GEc");
141:       MatView(GEc,viewer);
142:       PetscObjectSetName((PetscObject)(*GKins),"GK");
143:       MatView(*GKins,viewer);
144:       PetscObjectSetName((PetscObject)GEd,"Gproj");
145:       MatView(GEd,viewer);
146:       PetscViewerDestroy(&viewer);
147:     }
148: #endif
149:     MatDestroy(&GEd);
150:     MatDestroy(&GEc);
151:   }

153:   return(0);
154: }

156: PetscErrorCode PCBDDCNedelecSupport(PC pc)
157: {
158:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
159:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
160:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
161:   Vec                    tvec;
162:   PetscSF                sfv;
163:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
164:   MPI_Comm               comm;
165:   IS                     lned,primals,allprimals,nedfieldlocal;
166:   IS                     *eedges,*extrows,*extcols,*alleedges;
167:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
168:   PetscScalar            *vals,*work;
169:   PetscReal              *rwork;
170:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
171:   PetscInt               ne,nv,Lv,order,n,field;
172:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
173:   PetscInt               i,j,extmem,cum,maxsize,nee;
174:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
175:   PetscInt               *sfvleaves,*sfvroots;
176:   PetscInt               *corners,*cedges;
177:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
178:   PetscInt               *emarks;
179:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
180:   PetscErrorCode         ierr;

183:   /* If the discrete gradient is defined for a subset of dofs and global is true,
184:      it assumes G is given in global ordering for all the dofs.
185:      Otherwise, the ordering is global for the Nedelec field */
186:   order      = pcbddc->nedorder;
187:   conforming = pcbddc->conforming;
188:   field      = pcbddc->nedfield;
189:   global     = pcbddc->nedglobal;
190:   setprimal  = PETSC_FALSE;
191:   print      = PETSC_FALSE;
192:   singular   = PETSC_FALSE;

194:   /* Command line customization */
195:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
196:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
197:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
198:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
199:   /* print debug info TODO: to be removed */
200:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
201:   PetscOptionsEnd();

203:   /* Return if there are no edges in the decomposition and the problem is not singular */
204:   MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
205:   ISLocalToGlobalMappingGetSize(al2g,&n);
206:   PetscObjectGetComm((PetscObject)pc,&comm);
207:   if (!singular) {
208:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
209:     lrc[0] = PETSC_FALSE;
210:     for (i=0;i<n;i++) {
211:       if (PetscRealPart(vals[i]) > 2.) {
212:         lrc[0] = PETSC_TRUE;
213:         break;
214:       }
215:     }
216:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
217:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
218:     if (!lrc[1]) return(0);
219:   }

221:   /* Get Nedelec field */
222:   if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
223:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
224:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
225:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
226:     ISGetLocalSize(nedfieldlocal,&ne);
227:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
228:     ne            = n;
229:     nedfieldlocal = NULL;
230:     global        = PETSC_TRUE;
231:   } else if (field == PETSC_DECIDE) {
232:     PetscInt rst,ren,*idx;

234:     PetscArrayzero(matis->sf_leafdata,n);
235:     PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
236:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
237:     for (i=rst;i<ren;i++) {
238:       PetscInt nc;

240:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
241:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
242:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243:     }
244:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
245:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
246:     PetscMalloc1(n,&idx);
247:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
248:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
249:   } else {
250:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
251:   }

253:   /* Sanity checks */
254:   if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
255:   if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
256:   if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);

258:   /* Just set primal dofs and return */
259:   if (setprimal) {
260:     IS       enedfieldlocal;
261:     PetscInt *eidxs;

263:     PetscMalloc1(ne,&eidxs);
264:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
265:     if (nedfieldlocal) {
266:       ISGetIndices(nedfieldlocal,&idxs);
267:       for (i=0,cum=0;i<ne;i++) {
268:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
269:           eidxs[cum++] = idxs[i];
270:         }
271:       }
272:       ISRestoreIndices(nedfieldlocal,&idxs);
273:     } else {
274:       for (i=0,cum=0;i<ne;i++) {
275:         if (PetscRealPart(vals[i]) > 2.) {
276:           eidxs[cum++] = i;
277:         }
278:       }
279:     }
280:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
281:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
282:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
283:     PetscFree(eidxs);
284:     ISDestroy(&nedfieldlocal);
285:     ISDestroy(&enedfieldlocal);
286:     return(0);
287:   }

289:   /* Compute some l2g maps */
290:   if (nedfieldlocal) {
291:     IS is;

293:     /* need to map from the local Nedelec field to local numbering */
294:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
295:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
296:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
297:     ISLocalToGlobalMappingCreateIS(is,&al2g);
298:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
299:     if (global) {
300:       PetscObjectReference((PetscObject)al2g);
301:       el2g = al2g;
302:     } else {
303:       IS gis;

305:       ISRenumber(is,NULL,NULL,&gis);
306:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
307:       ISDestroy(&gis);
308:     }
309:     ISDestroy(&is);
310:   } else {
311:     /* restore default */
312:     pcbddc->nedfield = -1;
313:     /* one ref for the destruction of al2g, one for el2g */
314:     PetscObjectReference((PetscObject)al2g);
315:     PetscObjectReference((PetscObject)al2g);
316:     el2g = al2g;
317:     fl2g = NULL;
318:   }

320:   /* Start communication to drop connections for interior edges (for cc analysis only) */
321:   PetscArrayzero(matis->sf_leafdata,n);
322:   PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
323:   if (nedfieldlocal) {
324:     ISGetIndices(nedfieldlocal,&idxs);
325:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
326:     ISRestoreIndices(nedfieldlocal,&idxs);
327:   } else {
328:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
329:   }
330:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
331:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

333:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
334:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
335:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
336:     if (global) {
337:       PetscInt rst;

339:       MatGetOwnershipRange(G,&rst,NULL);
340:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
341:         if (matis->sf_rootdata[i] < 2) {
342:           matis->sf_rootdata[cum++] = i + rst;
343:         }
344:       }
345:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
346:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
347:     } else {
348:       PetscInt *tbz;

350:       PetscMalloc1(ne,&tbz);
351:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
352:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
353:       ISGetIndices(nedfieldlocal,&idxs);
354:       for (i=0,cum=0;i<ne;i++)
355:         if (matis->sf_leafdata[idxs[i]] == 1)
356:           tbz[cum++] = i;
357:       ISRestoreIndices(nedfieldlocal,&idxs);
358:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
359:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
360:       PetscFree(tbz);
361:     }
362:   } else { /* we need the entire G to infer the nullspace */
363:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
364:     G    = pcbddc->discretegradient;
365:   }

367:   /* Extract subdomain relevant rows of G */
368:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
369:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
370:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
371:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
372:   ISDestroy(&lned);
373:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
374:   MatDestroy(&lGall);
375:   MatISGetLocalMat(lGis,&lG);

377:   /* SF for nodal dofs communications */
378:   MatGetLocalSize(G,NULL,&Lv);
379:   MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
380:   PetscObjectReference((PetscObject)vl2g);
381:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
382:   PetscSFCreate(comm,&sfv);
383:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
384:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
385:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
386:   i    = singular ? 2 : 1;
387:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

389:   /* Destroy temporary G created in MATIS format and modified G */
390:   PetscObjectReference((PetscObject)lG);
391:   MatDestroy(&lGis);
392:   MatDestroy(&G);

394:   if (print) {
395:     PetscObjectSetName((PetscObject)lG,"initial_lG");
396:     MatView(lG,NULL);
397:   }

399:   /* Save lG for values insertion in change of basis */
400:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

402:   /* Analyze the edge-nodes connections (duplicate lG) */
403:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
404:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
405:   PetscBTCreate(nv,&btv);
406:   PetscBTCreate(ne,&bte);
407:   PetscBTCreate(ne,&btb);
408:   PetscBTCreate(ne,&btbd);
409:   PetscBTCreate(nv,&btvcand);
410:   /* need to import the boundary specification to ensure the
411:      proper detection of coarse edges' endpoints */
412:   if (pcbddc->DirichletBoundariesLocal) {
413:     IS is;

415:     if (fl2g) {
416:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
417:     } else {
418:       is = pcbddc->DirichletBoundariesLocal;
419:     }
420:     ISGetLocalSize(is,&cum);
421:     ISGetIndices(is,&idxs);
422:     for (i=0;i<cum;i++) {
423:       if (idxs[i] >= 0) {
424:         PetscBTSet(btb,idxs[i]);
425:         PetscBTSet(btbd,idxs[i]);
426:       }
427:     }
428:     ISRestoreIndices(is,&idxs);
429:     if (fl2g) {
430:       ISDestroy(&is);
431:     }
432:   }
433:   if (pcbddc->NeumannBoundariesLocal) {
434:     IS is;

436:     if (fl2g) {
437:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
438:     } else {
439:       is = pcbddc->NeumannBoundariesLocal;
440:     }
441:     ISGetLocalSize(is,&cum);
442:     ISGetIndices(is,&idxs);
443:     for (i=0;i<cum;i++) {
444:       if (idxs[i] >= 0) {
445:         PetscBTSet(btb,idxs[i]);
446:       }
447:     }
448:     ISRestoreIndices(is,&idxs);
449:     if (fl2g) {
450:       ISDestroy(&is);
451:     }
452:   }

454:   /* Count neighs per dof */
455:   ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
456:   ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);

458:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
459:      for proper detection of coarse edges' endpoints */
460:   PetscBTCreate(ne,&btee);
461:   for (i=0;i<ne;i++) {
462:     if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
463:       PetscBTSet(btee,i);
464:     }
465:   }
466:   PetscMalloc1(ne,&marks);
467:   if (!conforming) {
468:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
469:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
470:   }
471:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
472:   MatSeqAIJGetArray(lGe,&vals);
473:   cum  = 0;
474:   for (i=0;i<ne;i++) {
475:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
476:     if (!PetscBTLookup(btee,i)) {
477:       marks[cum++] = i;
478:       continue;
479:     }
480:     /* set badly connected edge dofs as primal */
481:     if (!conforming) {
482:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
483:         marks[cum++] = i;
484:         PetscBTSet(bte,i);
485:         for (j=ii[i];j<ii[i+1];j++) {
486:           PetscBTSet(btv,jj[j]);
487:         }
488:       } else {
489:         /* every edge dofs should be connected trough a certain number of nodal dofs
490:            to other edge dofs belonging to coarse edges
491:            - at most 2 endpoints
492:            - order-1 interior nodal dofs
493:            - no undefined nodal dofs (nconn < order)
494:         */
495:         PetscInt ends = 0,ints = 0, undef = 0;
496:         for (j=ii[i];j<ii[i+1];j++) {
497:           PetscInt v = jj[j],k;
498:           PetscInt nconn = iit[v+1]-iit[v];
499:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
500:           if (nconn > order) ends++;
501:           else if (nconn == order) ints++;
502:           else undef++;
503:         }
504:         if (undef || ends > 2 || ints != order -1) {
505:           marks[cum++] = i;
506:           PetscBTSet(bte,i);
507:           for (j=ii[i];j<ii[i+1];j++) {
508:             PetscBTSet(btv,jj[j]);
509:           }
510:         }
511:       }
512:     }
513:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
514:     if (!order && ii[i+1] != ii[i]) {
515:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
516:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
517:     }
518:   }
519:   PetscBTDestroy(&btee);
520:   MatSeqAIJRestoreArray(lGe,&vals);
521:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
522:   if (!conforming) {
523:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
524:     MatDestroy(&lGt);
525:   }
526:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

528:   /* identify splitpoints and corner candidates */
529:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
530:   if (print) {
531:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
532:     MatView(lGe,NULL);
533:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
534:     MatView(lGt,NULL);
535:   }
536:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
537:   MatSeqAIJGetArray(lGt,&vals);
538:   for (i=0;i<nv;i++) {
539:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
540:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
541:     if (!order) { /* variable order */
542:       PetscReal vorder = 0.;

544:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
545:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
546:       if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
547:       ord  = 1;
548:     }
549:     if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
550:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
551:       if (PetscBTLookup(btbd,jj[j])) {
552:         bdir = PETSC_TRUE;
553:         break;
554:       }
555:       if (vc != ecount[jj[j]]) {
556:         sneighs = PETSC_FALSE;
557:       } else {
558:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
559:         for (k=0;k<vc;k++) {
560:           if (vn[k] != en[k]) {
561:             sneighs = PETSC_FALSE;
562:             break;
563:           }
564:         }
565:       }
566:     }
567:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
568:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
569:       PetscBTSet(btv,i);
570:     } else if (test == ord) {
571:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
572:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
573:         PetscBTSet(btv,i);
574:       } else {
575:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
576:         PetscBTSet(btvcand,i);
577:       }
578:     }
579:   }
580:   ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
581:   ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
582:   PetscBTDestroy(&btbd);

584:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
585:   if (order != 1) {
586:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
587:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
588:     for (i=0;i<nv;i++) {
589:       if (PetscBTLookup(btvcand,i)) {
590:         PetscBool found = PETSC_FALSE;
591:         for (j=ii[i];j<ii[i+1] && !found;j++) {
592:           PetscInt k,e = jj[j];
593:           if (PetscBTLookup(bte,e)) continue;
594:           for (k=iit[e];k<iit[e+1];k++) {
595:             PetscInt v = jjt[k];
596:             if (v != i && PetscBTLookup(btvcand,v)) {
597:               found = PETSC_TRUE;
598:               break;
599:             }
600:           }
601:         }
602:         if (!found) {
603:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D CLEARED\n",i);
604:           PetscBTClear(btvcand,i);
605:         } else {
606:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D ACCEPTED\n",i);
607:         }
608:       }
609:     }
610:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
611:   }
612:   MatSeqAIJRestoreArray(lGt,&vals);
613:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
614:   MatDestroy(&lGe);

616:   /* Get the local G^T explicitly */
617:   MatDestroy(&lGt);
618:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
619:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

621:   /* Mark interior nodal dofs */
622:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
623:   PetscBTCreate(nv,&btvi);
624:   for (i=1;i<n_neigh;i++) {
625:     for (j=0;j<n_shared[i];j++) {
626:       PetscBTSet(btvi,shared[i][j]);
627:     }
628:   }
629:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

631:   /* communicate corners and splitpoints */
632:   PetscMalloc1(nv,&vmarks);
633:   PetscArrayzero(sfvleaves,nv);
634:   PetscArrayzero(sfvroots,Lv);
635:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

637:   if (print) {
638:     IS tbz;

640:     cum = 0;
641:     for (i=0;i<nv;i++)
642:       if (sfvleaves[i])
643:         vmarks[cum++] = i;

645:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
646:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
647:     ISView(tbz,NULL);
648:     ISDestroy(&tbz);
649:   }

651:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
652:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
653:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);
654:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);

656:   /* Zero rows of lGt corresponding to identified corners
657:      and interior nodal dofs */
658:   cum = 0;
659:   for (i=0;i<nv;i++) {
660:     if (sfvleaves[i]) {
661:       vmarks[cum++] = i;
662:       PetscBTSet(btv,i);
663:     }
664:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
665:   }
666:   PetscBTDestroy(&btvi);
667:   if (print) {
668:     IS tbz;

670:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
671:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
672:     ISView(tbz,NULL);
673:     ISDestroy(&tbz);
674:   }
675:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
676:   PetscFree(vmarks);
677:   PetscSFDestroy(&sfv);
678:   PetscFree2(sfvleaves,sfvroots);

680:   /* Recompute G */
681:   MatDestroy(&lG);
682:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
683:   if (print) {
684:     PetscObjectSetName((PetscObject)lG,"used_lG");
685:     MatView(lG,NULL);
686:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
687:     MatView(lGt,NULL);
688:   }

690:   /* Get primal dofs (if any) */
691:   cum = 0;
692:   for (i=0;i<ne;i++) {
693:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
694:   }
695:   if (fl2g) {
696:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
697:   }
698:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
699:   if (print) {
700:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
701:     ISView(primals,NULL);
702:   }
703:   PetscBTDestroy(&bte);
704:   /* TODO: what if the user passed in some of them ?  */
705:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
706:   ISDestroy(&primals);

708:   /* Compute edge connectivity */
709:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");

711:   /* Symbolic conn = lG*lGt */
712:   MatProductCreate(lG,lGt,NULL,&conn);
713:   MatProductSetType(conn,MATPRODUCT_AB);
714:   MatProductSetAlgorithm(conn,"default");
715:   MatProductSetFill(conn,PETSC_DEFAULT);
716:   PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");
717:   MatProductSetFromOptions(conn);
718:   MatProductSymbolic(conn);

720:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
721:   if (fl2g) {
722:     PetscBT   btf;
723:     PetscInt  *iia,*jja,*iiu,*jju;
724:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

726:     /* create CSR for all local dofs */
727:     PetscMalloc1(n+1,&iia);
728:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
729:       if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
730:       iiu = pcbddc->mat_graph->xadj;
731:       jju = pcbddc->mat_graph->adjncy;
732:     } else if (pcbddc->use_local_adj) {
733:       rest = PETSC_TRUE;
734:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
735:     } else {
736:       free   = PETSC_TRUE;
737:       PetscMalloc2(n+1,&iiu,n,&jju);
738:       iiu[0] = 0;
739:       for (i=0;i<n;i++) {
740:         iiu[i+1] = i+1;
741:         jju[i]   = -1;
742:       }
743:     }

745:     /* import sizes of CSR */
746:     iia[0] = 0;
747:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

749:     /* overwrite entries corresponding to the Nedelec field */
750:     PetscBTCreate(n,&btf);
751:     ISGetIndices(nedfieldlocal,&idxs);
752:     for (i=0;i<ne;i++) {
753:       PetscBTSet(btf,idxs[i]);
754:       iia[idxs[i]+1] = ii[i+1]-ii[i];
755:     }

757:     /* iia in CSR */
758:     for (i=0;i<n;i++) iia[i+1] += iia[i];

760:     /* jja in CSR */
761:     PetscMalloc1(iia[n],&jja);
762:     for (i=0;i<n;i++)
763:       if (!PetscBTLookup(btf,i))
764:         for (j=0;j<iiu[i+1]-iiu[i];j++)
765:           jja[iia[i]+j] = jju[iiu[i]+j];

767:     /* map edge dofs connectivity */
768:     if (jj) {
769:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
770:       for (i=0;i<ne;i++) {
771:         PetscInt e = idxs[i];
772:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
773:       }
774:     }
775:     ISRestoreIndices(nedfieldlocal,&idxs);
776:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
777:     if (rest) {
778:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
779:     }
780:     if (free) {
781:       PetscFree2(iiu,jju);
782:     }
783:     PetscBTDestroy(&btf);
784:   } else {
785:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
786:   }

788:   /* Analyze interface for edge dofs */
789:   PCBDDCAnalyzeInterface(pc);
790:   pcbddc->mat_graph->twodim = PETSC_FALSE;

792:   /* Get coarse edges in the edge space */
793:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
794:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

796:   if (fl2g) {
797:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
798:     PetscMalloc1(nee,&eedges);
799:     for (i=0;i<nee;i++) {
800:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
801:     }
802:   } else {
803:     eedges  = alleedges;
804:     primals = allprimals;
805:   }

807:   /* Mark fine edge dofs with their coarse edge id */
808:   PetscArrayzero(marks,ne);
809:   ISGetLocalSize(primals,&cum);
810:   ISGetIndices(primals,&idxs);
811:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
812:   ISRestoreIndices(primals,&idxs);
813:   if (print) {
814:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
815:     ISView(primals,NULL);
816:   }

818:   maxsize = 0;
819:   for (i=0;i<nee;i++) {
820:     PetscInt size,mark = i+1;

822:     ISGetLocalSize(eedges[i],&size);
823:     ISGetIndices(eedges[i],&idxs);
824:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
825:     ISRestoreIndices(eedges[i],&idxs);
826:     maxsize = PetscMax(maxsize,size);
827:   }

829:   /* Find coarse edge endpoints */
830:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
831:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
832:   for (i=0;i<nee;i++) {
833:     PetscInt mark = i+1,size;

835:     ISGetLocalSize(eedges[i],&size);
836:     if (!size && nedfieldlocal) continue;
837:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
838:     ISGetIndices(eedges[i],&idxs);
839:     if (print) {
840:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
841:       ISView(eedges[i],NULL);
842:     }
843:     for (j=0;j<size;j++) {
844:       PetscInt k, ee = idxs[j];
845:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %D\n",ee);
846:       for (k=ii[ee];k<ii[ee+1];k++) {
847:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %D\n",jj[k]);
848:         if (PetscBTLookup(btv,jj[k])) {
849:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %D\n",jj[k]);
850:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
851:           PetscInt  k2;
852:           PetscBool corner = PETSC_FALSE;
853:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
854:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
855:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
856:                if the edge dof lie on the natural part of the boundary */
857:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
858:               corner = PETSC_TRUE;
859:               break;
860:             }
861:           }
862:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
863:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %D\n",jj[k]);
864:             PetscBTSet(btv,jj[k]);
865:           } else {
866:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
867:           }
868:         }
869:       }
870:     }
871:     ISRestoreIndices(eedges[i],&idxs);
872:   }
873:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
874:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
875:   PetscBTDestroy(&btb);

877:   /* Reset marked primal dofs */
878:   ISGetLocalSize(primals,&cum);
879:   ISGetIndices(primals,&idxs);
880:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
881:   ISRestoreIndices(primals,&idxs);

883:   /* Now use the initial lG */
884:   MatDestroy(&lG);
885:   MatDestroy(&lGt);
886:   lG   = lGinit;
887:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

889:   /* Compute extended cols indices */
890:   PetscBTCreate(nv,&btvc);
891:   PetscBTCreate(nee,&bter);
892:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
893:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
894:   i   *= maxsize;
895:   PetscCalloc1(nee,&extcols);
896:   PetscMalloc2(i,&extrow,i,&gidxs);
897:   eerr = PETSC_FALSE;
898:   for (i=0;i<nee;i++) {
899:     PetscInt size,found = 0;

901:     cum  = 0;
902:     ISGetLocalSize(eedges[i],&size);
903:     if (!size && nedfieldlocal) continue;
904:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
905:     ISGetIndices(eedges[i],&idxs);
906:     PetscBTMemzero(nv,btvc);
907:     for (j=0;j<size;j++) {
908:       PetscInt k,ee = idxs[j];
909:       for (k=ii[ee];k<ii[ee+1];k++) {
910:         PetscInt vv = jj[k];
911:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
912:         else if (!PetscBTLookupSet(btvc,vv)) found++;
913:       }
914:     }
915:     ISRestoreIndices(eedges[i],&idxs);
916:     PetscSortRemoveDupsInt(&cum,extrow);
917:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
918:     PetscSortIntWithArray(cum,gidxs,extrow);
919:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
920:     /* it may happen that endpoints are not defined at this point
921:        if it is the case, mark this edge for a second pass */
922:     if (cum != size -1 || found != 2) {
923:       PetscBTSet(bter,i);
924:       if (print) {
925:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
926:         ISView(eedges[i],NULL);
927:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
928:         ISView(extcols[i],NULL);
929:       }
930:       eerr = PETSC_TRUE;
931:     }
932:   }
933:   /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
934:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
935:   if (done) {
936:     PetscInt *newprimals;

938:     PetscMalloc1(ne,&newprimals);
939:     ISGetLocalSize(primals,&cum);
940:     ISGetIndices(primals,&idxs);
941:     PetscArraycpy(newprimals,idxs,cum);
942:     ISRestoreIndices(primals,&idxs);
943:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
944:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
945:     for (i=0;i<nee;i++) {
946:       PetscBool has_candidates = PETSC_FALSE;
947:       if (PetscBTLookup(bter,i)) {
948:         PetscInt size,mark = i+1;

950:         ISGetLocalSize(eedges[i],&size);
951:         ISGetIndices(eedges[i],&idxs);
952:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
953:         for (j=0;j<size;j++) {
954:           PetscInt k,ee = idxs[j];
955:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
956:           for (k=ii[ee];k<ii[ee+1];k++) {
957:             /* set all candidates located on the edge as corners */
958:             if (PetscBTLookup(btvcand,jj[k])) {
959:               PetscInt k2,vv = jj[k];
960:               has_candidates = PETSC_TRUE;
961:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %D\n",vv);
962:               PetscBTSet(btv,vv);
963:               /* set all edge dofs connected to candidate as primals */
964:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
965:                 if (marks[jjt[k2]] == mark) {
966:                   PetscInt k3,ee2 = jjt[k2];
967:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %D\n",ee2);
968:                   newprimals[cum++] = ee2;
969:                   /* finally set the new corners */
970:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
971:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %D\n",jj[k3]);
972:                     PetscBTSet(btv,jj[k3]);
973:                   }
974:                 }
975:               }
976:             } else {
977:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %D\n",jj[k]);
978:             }
979:           }
980:         }
981:         if (!has_candidates) { /* circular edge */
982:           PetscInt k, ee = idxs[0],*tmarks;

984:           PetscCalloc1(ne,&tmarks);
985:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %D\n",i);
986:           for (k=ii[ee];k<ii[ee+1];k++) {
987:             PetscInt k2;
988:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %D\n",jj[k]);
989:             PetscBTSet(btv,jj[k]);
990:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
991:           }
992:           for (j=0;j<size;j++) {
993:             if (tmarks[idxs[j]] > 1) {
994:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %D\n",idxs[j]);
995:               newprimals[cum++] = idxs[j];
996:             }
997:           }
998:           PetscFree(tmarks);
999:         }
1000:         ISRestoreIndices(eedges[i],&idxs);
1001:       }
1002:       ISDestroy(&extcols[i]);
1003:     }
1004:     PetscFree(extcols);
1005:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1006:     PetscSortRemoveDupsInt(&cum,newprimals);
1007:     if (fl2g) {
1008:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1009:       ISDestroy(&primals);
1010:       for (i=0;i<nee;i++) {
1011:         ISDestroy(&eedges[i]);
1012:       }
1013:       PetscFree(eedges);
1014:     }
1015:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1016:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1017:     PetscFree(newprimals);
1018:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1019:     ISDestroy(&primals);
1020:     PCBDDCAnalyzeInterface(pc);
1021:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1022:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1023:     if (fl2g) {
1024:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1025:       PetscMalloc1(nee,&eedges);
1026:       for (i=0;i<nee;i++) {
1027:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1028:       }
1029:     } else {
1030:       eedges  = alleedges;
1031:       primals = allprimals;
1032:     }
1033:     PetscCalloc1(nee,&extcols);

1035:     /* Mark again */
1036:     PetscArrayzero(marks,ne);
1037:     for (i=0;i<nee;i++) {
1038:       PetscInt size,mark = i+1;

1040:       ISGetLocalSize(eedges[i],&size);
1041:       ISGetIndices(eedges[i],&idxs);
1042:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1043:       ISRestoreIndices(eedges[i],&idxs);
1044:     }
1045:     if (print) {
1046:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1047:       ISView(primals,NULL);
1048:     }

1050:     /* Recompute extended cols */
1051:     eerr = PETSC_FALSE;
1052:     for (i=0;i<nee;i++) {
1053:       PetscInt size;

1055:       cum  = 0;
1056:       ISGetLocalSize(eedges[i],&size);
1057:       if (!size && nedfieldlocal) continue;
1058:       if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1059:       ISGetIndices(eedges[i],&idxs);
1060:       for (j=0;j<size;j++) {
1061:         PetscInt k,ee = idxs[j];
1062:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1063:       }
1064:       ISRestoreIndices(eedges[i],&idxs);
1065:       PetscSortRemoveDupsInt(&cum,extrow);
1066:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1067:       PetscSortIntWithArray(cum,gidxs,extrow);
1068:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1069:       if (cum != size -1) {
1070:         if (print) {
1071:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1072:           ISView(eedges[i],NULL);
1073:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1074:           ISView(extcols[i],NULL);
1075:         }
1076:         eerr = PETSC_TRUE;
1077:       }
1078:     }
1079:   }
1080:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1081:   PetscFree2(extrow,gidxs);
1082:   PetscBTDestroy(&bter);
1083:   if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1084:   /* an error should not occur at this point */
1085:   if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1087:   /* Check the number of endpoints */
1088:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1089:   PetscMalloc1(2*nee,&corners);
1090:   PetscMalloc1(nee,&cedges);
1091:   for (i=0;i<nee;i++) {
1092:     PetscInt size, found = 0, gc[2];

1094:     /* init with defaults */
1095:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1096:     ISGetLocalSize(eedges[i],&size);
1097:     if (!size && nedfieldlocal) continue;
1098:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1099:     ISGetIndices(eedges[i],&idxs);
1100:     PetscBTMemzero(nv,btvc);
1101:     for (j=0;j<size;j++) {
1102:       PetscInt k,ee = idxs[j];
1103:       for (k=ii[ee];k<ii[ee+1];k++) {
1104:         PetscInt vv = jj[k];
1105:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1106:           if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1107:           corners[i*2+found++] = vv;
1108:         }
1109:       }
1110:     }
1111:     if (found != 2) {
1112:       PetscInt e;
1113:       if (fl2g) {
1114:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1115:       } else {
1116:         e = idxs[0];
1117:       }
1118:       SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1119:     }

1121:     /* get primal dof index on this coarse edge */
1122:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1123:     if (gc[0] > gc[1]) {
1124:       PetscInt swap  = corners[2*i];
1125:       corners[2*i]   = corners[2*i+1];
1126:       corners[2*i+1] = swap;
1127:     }
1128:     cedges[i] = idxs[size-1];
1129:     ISRestoreIndices(eedges[i],&idxs);
1130:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1131:   }
1132:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1133:   PetscBTDestroy(&btvc);

1135:   if (PetscDefined(USE_DEBUG)) {
1136:     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1137:      not interfere with neighbouring coarse edges */
1138:     PetscMalloc1(nee+1,&emarks);
1139:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1140:     for (i=0;i<nv;i++) {
1141:       PetscInt emax = 0,eemax = 0;

1143:       if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1144:       PetscArrayzero(emarks,nee+1);
1145:       for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1146:       for (j=1;j<nee+1;j++) {
1147:         if (emax < emarks[j]) {
1148:           emax = emarks[j];
1149:           eemax = j;
1150:         }
1151:       }
1152:       /* not relevant for edges */
1153:       if (!eemax) continue;

1155:       for (j=ii[i];j<ii[i+1];j++) {
1156:         if (marks[jj[j]] && marks[jj[j]] != eemax) {
1157:           SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1158:         }
1159:       }
1160:     }
1161:     PetscFree(emarks);
1162:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1163:   }

1165:   /* Compute extended rows indices for edge blocks of the change of basis */
1166:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1167:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1168:   extmem *= maxsize;
1169:   PetscMalloc1(extmem*nee,&extrow);
1170:   PetscMalloc1(nee,&extrows);
1171:   PetscCalloc1(nee,&extrowcum);
1172:   for (i=0;i<nv;i++) {
1173:     PetscInt mark = 0,size,start;

1175:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1176:     for (j=ii[i];j<ii[i+1];j++)
1177:       if (marks[jj[j]] && !mark)
1178:         mark = marks[jj[j]];

1180:     /* not relevant */
1181:     if (!mark) continue;

1183:     /* import extended row */
1184:     mark--;
1185:     start = mark*extmem+extrowcum[mark];
1186:     size = ii[i+1]-ii[i];
1187:     if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1188:     PetscArraycpy(extrow+start,jj+ii[i],size);
1189:     extrowcum[mark] += size;
1190:   }
1191:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1192:   MatDestroy(&lGt);
1193:   PetscFree(marks);

1195:   /* Compress extrows */
1196:   cum  = 0;
1197:   for (i=0;i<nee;i++) {
1198:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1199:     PetscSortRemoveDupsInt(&size,start);
1200:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1201:     cum  = PetscMax(cum,size);
1202:   }
1203:   PetscFree(extrowcum);
1204:   PetscBTDestroy(&btv);
1205:   PetscBTDestroy(&btvcand);

1207:   /* Workspace for lapack inner calls and VecSetValues */
1208:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1210:   /* Create change of basis matrix (preallocation can be improved) */
1211:   MatCreate(comm,&T);
1212:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1213:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1214:   MatSetType(T,MATAIJ);
1215:   MatSeqAIJSetPreallocation(T,10,NULL);
1216:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1217:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1218:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1219:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1220:   ISLocalToGlobalMappingDestroy(&al2g);

1222:   /* Defaults to identity */
1223:   MatCreateVecs(pc->pmat,&tvec,NULL);
1224:   VecSet(tvec,1.0);
1225:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1226:   VecDestroy(&tvec);

1228:   /* Create discrete gradient for the coarser level if needed */
1229:   MatDestroy(&pcbddc->nedcG);
1230:   ISDestroy(&pcbddc->nedclocal);
1231:   if (pcbddc->current_level < pcbddc->max_levels) {
1232:     ISLocalToGlobalMapping cel2g,cvl2g;
1233:     IS                     wis,gwis;
1234:     PetscInt               cnv,cne;

1236:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1237:     if (fl2g) {
1238:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1239:     } else {
1240:       PetscObjectReference((PetscObject)wis);
1241:       pcbddc->nedclocal = wis;
1242:     }
1243:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1244:     ISDestroy(&wis);
1245:     ISRenumber(gwis,NULL,&cne,&wis);
1246:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1247:     ISDestroy(&wis);
1248:     ISDestroy(&gwis);

1250:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1251:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1252:     ISDestroy(&wis);
1253:     ISRenumber(gwis,NULL,&cnv,&wis);
1254:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1255:     ISDestroy(&wis);
1256:     ISDestroy(&gwis);

1258:     MatCreate(comm,&pcbddc->nedcG);
1259:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1260:     MatSetType(pcbddc->nedcG,MATAIJ);
1261:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1262:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1263:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1264:     ISLocalToGlobalMappingDestroy(&cel2g);
1265:     ISLocalToGlobalMappingDestroy(&cvl2g);
1266:   }
1267:   ISLocalToGlobalMappingDestroy(&vl2g);

1269: #if defined(PRINT_GDET)
1270:   inc = 0;
1271:   lev = pcbddc->current_level;
1272: #endif

1274:   /* Insert values in the change of basis matrix */
1275:   for (i=0;i<nee;i++) {
1276:     Mat         Gins = NULL, GKins = NULL;
1277:     IS          cornersis = NULL;
1278:     PetscScalar cvals[2];

1280:     if (pcbddc->nedcG) {
1281:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1282:     }
1283:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1284:     if (Gins && GKins) {
1285:       const PetscScalar *data;
1286:       const PetscInt    *rows,*cols;
1287:       PetscInt          nrh,nch,nrc,ncc;

1289:       ISGetIndices(eedges[i],&cols);
1290:       /* H1 */
1291:       ISGetIndices(extrows[i],&rows);
1292:       MatGetSize(Gins,&nrh,&nch);
1293:       MatDenseGetArrayRead(Gins,&data);
1294:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1295:       MatDenseRestoreArrayRead(Gins,&data);
1296:       ISRestoreIndices(extrows[i],&rows);
1297:       /* complement */
1298:       MatGetSize(GKins,&nrc,&ncc);
1299:       if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1300:       if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1301:       if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1302:       MatDenseGetArrayRead(GKins,&data);
1303:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1304:       MatDenseRestoreArrayRead(GKins,&data);

1306:       /* coarse discrete gradient */
1307:       if (pcbddc->nedcG) {
1308:         PetscInt cols[2];

1310:         cols[0] = 2*i;
1311:         cols[1] = 2*i+1;
1312:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1313:       }
1314:       ISRestoreIndices(eedges[i],&cols);
1315:     }
1316:     ISDestroy(&extrows[i]);
1317:     ISDestroy(&extcols[i]);
1318:     ISDestroy(&cornersis);
1319:     MatDestroy(&Gins);
1320:     MatDestroy(&GKins);
1321:   }
1322:   ISLocalToGlobalMappingDestroy(&el2g);

1324:   /* Start assembling */
1325:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1326:   if (pcbddc->nedcG) {
1327:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1328:   }

1330:   /* Free */
1331:   if (fl2g) {
1332:     ISDestroy(&primals);
1333:     for (i=0;i<nee;i++) {
1334:       ISDestroy(&eedges[i]);
1335:     }
1336:     PetscFree(eedges);
1337:   }

1339:   /* hack mat_graph with primal dofs on the coarse edges */
1340:   {
1341:     PCBDDCGraph graph   = pcbddc->mat_graph;
1342:     PetscInt    *oqueue = graph->queue;
1343:     PetscInt    *ocptr  = graph->cptr;
1344:     PetscInt    ncc,*idxs;

1346:     /* find first primal edge */
1347:     if (pcbddc->nedclocal) {
1348:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1349:     } else {
1350:       if (fl2g) {
1351:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1352:       }
1353:       idxs = cedges;
1354:     }
1355:     cum = 0;
1356:     while (cum < nee && cedges[cum] < 0) cum++;

1358:     /* adapt connected components */
1359:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1360:     graph->cptr[0] = 0;
1361:     for (i=0,ncc=0;i<graph->ncc;i++) {
1362:       PetscInt lc = ocptr[i+1]-ocptr[i];
1363:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1364:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1365:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1366:         ncc++;
1367:         lc--;
1368:         cum++;
1369:         while (cum < nee && cedges[cum] < 0) cum++;
1370:       }
1371:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1372:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1373:       ncc++;
1374:     }
1375:     graph->ncc = ncc;
1376:     if (pcbddc->nedclocal) {
1377:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1378:     }
1379:     PetscFree2(ocptr,oqueue);
1380:   }
1381:   ISLocalToGlobalMappingDestroy(&fl2g);
1382:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1383:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1384:   MatDestroy(&conn);

1386:   ISDestroy(&nedfieldlocal);
1387:   PetscFree(extrow);
1388:   PetscFree2(work,rwork);
1389:   PetscFree(corners);
1390:   PetscFree(cedges);
1391:   PetscFree(extrows);
1392:   PetscFree(extcols);
1393:   MatDestroy(&lG);

1395:   /* Complete assembling */
1396:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1397:   if (pcbddc->nedcG) {
1398:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1399: #if 0
1400:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1401:     MatView(pcbddc->nedcG,NULL);
1402: #endif
1403:   }

1405:   /* set change of basis */
1406:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1407:   MatDestroy(&T);

1409:   return(0);
1410: }

1412: /* the near-null space of BDDC carries information on quadrature weights,
1413:    and these can be collinear -> so cheat with MatNullSpaceCreate
1414:    and create a suitable set of basis vectors first */
1415: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1416: {
1418:   PetscInt       i;

1421:   for (i=0;i<nvecs;i++) {
1422:     PetscInt first,last;

1424:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1425:     if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1426:     if (i>=first && i < last) {
1427:       PetscScalar *data;
1428:       VecGetArray(quad_vecs[i],&data);
1429:       if (!has_const) {
1430:         data[i-first] = 1.;
1431:       } else {
1432:         data[2*i-first] = 1./PetscSqrtReal(2.);
1433:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1434:       }
1435:       VecRestoreArray(quad_vecs[i],&data);
1436:     }
1437:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1438:   }
1439:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1440:   for (i=0;i<nvecs;i++) { /* reset vectors */
1441:     PetscInt first,last;
1442:     VecLockReadPop(quad_vecs[i]);
1443:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1444:     if (i>=first && i < last) {
1445:       PetscScalar *data;
1446:       VecGetArray(quad_vecs[i],&data);
1447:       if (!has_const) {
1448:         data[i-first] = 0.;
1449:       } else {
1450:         data[2*i-first] = 0.;
1451:         data[2*i-first+1] = 0.;
1452:       }
1453:       VecRestoreArray(quad_vecs[i],&data);
1454:     }
1455:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1456:     VecLockReadPush(quad_vecs[i]);
1457:   }
1458:   return(0);
1459: }

1461: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1462: {
1463:   Mat                    loc_divudotp;
1464:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1465:   ISLocalToGlobalMapping map;
1466:   PetscScalar            *vals;
1467:   const PetscScalar      *array;
1468:   PetscInt               i,maxneighs = 0,maxsize,*gidxs;
1469:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
1470:   PetscMPIInt            rank;
1471:   PetscErrorCode         ierr;

1474:   ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1475:   for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs);
1476:   MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1477:   if (!maxneighs) {
1478:     ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1479:     *nnsp = NULL;
1480:     return(0);
1481:   }
1482:   maxsize = 0;
1483:   for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1484:   PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1485:   /* create vectors to hold quadrature weights */
1486:   MatCreateVecs(A,&quad_vec,NULL);
1487:   if (!transpose) {
1488:     MatGetLocalToGlobalMapping(A,&map,NULL);
1489:   } else {
1490:     MatGetLocalToGlobalMapping(A,NULL,&map);
1491:   }
1492:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1493:   VecDestroy(&quad_vec);
1494:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1495:   for (i=0;i<maxneighs;i++) {
1496:     VecLockReadPop(quad_vecs[i]);
1497:   }

1499:   /* compute local quad vec */
1500:   MatISGetLocalMat(divudotp,&loc_divudotp);
1501:   if (!transpose) {
1502:     MatCreateVecs(loc_divudotp,&v,&p);
1503:   } else {
1504:     MatCreateVecs(loc_divudotp,&p,&v);
1505:   }
1506:   VecSet(p,1.);
1507:   if (!transpose) {
1508:     MatMultTranspose(loc_divudotp,p,v);
1509:   } else {
1510:     MatMult(loc_divudotp,p,v);
1511:   }
1512:   if (vl2l) {
1513:     Mat        lA;
1514:     VecScatter sc;

1516:     MatISGetLocalMat(A,&lA);
1517:     MatCreateVecs(lA,&vins,NULL);
1518:     VecScatterCreate(v,NULL,vins,vl2l,&sc);
1519:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1520:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1521:     VecScatterDestroy(&sc);
1522:   } else {
1523:     vins = v;
1524:   }
1525:   VecGetArrayRead(vins,&array);
1526:   VecDestroy(&p);

1528:   /* insert in global quadrature vecs */
1529:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1530:   for (i=1;i<n_neigh;i++) {
1531:     const PetscInt    *idxs;
1532:     PetscInt          idx,nn,j;

1534:     idxs = shared[i];
1535:     nn   = n_shared[i];
1536:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1537:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1538:     idx  = -(idx+1);
1539:     if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs);
1540:     ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1541:     VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1542:   }
1543:   ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1544:   VecRestoreArrayRead(vins,&array);
1545:   if (vl2l) {
1546:     VecDestroy(&vins);
1547:   }
1548:   VecDestroy(&v);
1549:   PetscFree2(gidxs,vals);

1551:   /* assemble near null space */
1552:   for (i=0;i<maxneighs;i++) {
1553:     VecAssemblyBegin(quad_vecs[i]);
1554:   }
1555:   for (i=0;i<maxneighs;i++) {
1556:     VecAssemblyEnd(quad_vecs[i]);
1557:     VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1558:     VecLockReadPush(quad_vecs[i]);
1559:   }
1560:   VecDestroyVecs(maxneighs,&quad_vecs);
1561:   return(0);
1562: }

1564: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1565: {
1566:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

1570:   if (primalv) {
1571:     if (pcbddc->user_primal_vertices_local) {
1572:       IS list[2], newp;

1574:       list[0] = primalv;
1575:       list[1] = pcbddc->user_primal_vertices_local;
1576:       ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1577:       ISSortRemoveDups(newp);
1578:       ISDestroy(&list[1]);
1579:       pcbddc->user_primal_vertices_local = newp;
1580:     } else {
1581:       PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1582:     }
1583:   }
1584:   return(0);
1585: }

1587: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1588: {
1589:   PetscInt f, *comp  = (PetscInt *)ctx;

1592:   for (f=0;f<Nf;f++) out[f] = X[*comp];
1593:   return(0);
1594: }

1596: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1597: {
1599:   Vec            local,global;
1600:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1601:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1602:   PetscBool      monolithic = PETSC_FALSE;

1605:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1606:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1607:   PetscOptionsEnd();
1608:   /* need to convert from global to local topology information and remove references to information in global ordering */
1609:   MatCreateVecs(pc->pmat,&global,NULL);
1610:   MatCreateVecs(matis->A,&local,NULL);
1611:   VecBindToCPU(global,PETSC_TRUE);
1612:   VecBindToCPU(local,PETSC_TRUE);
1613:   if (monolithic) { /* just get block size to properly compute vertices */
1614:     if (pcbddc->vertex_size == 1) {
1615:       MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1616:     }
1617:     goto boundary;
1618:   }

1620:   if (pcbddc->user_provided_isfordofs) {
1621:     if (pcbddc->n_ISForDofs) {
1622:       PetscInt i;

1624:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1625:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1626:         PetscInt bs;

1628:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1629:         ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1630:         ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1631:         ISDestroy(&pcbddc->ISForDofs[i]);
1632:       }
1633:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1634:       pcbddc->n_ISForDofs = 0;
1635:       PetscFree(pcbddc->ISForDofs);
1636:     }
1637:   } else {
1638:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1639:       DM dm;

1641:       MatGetDM(pc->pmat, &dm);
1642:       if (!dm) {
1643:         PCGetDM(pc, &dm);
1644:       }
1645:       if (dm) {
1646:         IS      *fields;
1647:         PetscInt nf,i;

1649:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1650:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1651:         for (i=0;i<nf;i++) {
1652:           PetscInt bs;

1654:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1655:           ISGetBlockSize(fields[i],&bs);
1656:           ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1657:           ISDestroy(&fields[i]);
1658:         }
1659:         PetscFree(fields);
1660:         pcbddc->n_ISForDofsLocal = nf;
1661:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1662:         PetscContainer   c;

1664:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1665:         if (c) {
1666:           MatISLocalFields lf;
1667:           PetscContainerGetPointer(c,(void**)&lf);
1668:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1669:         } else { /* fallback, create the default fields if bs > 1 */
1670:           PetscInt i, n = matis->A->rmap->n;
1671:           MatGetBlockSize(pc->pmat,&i);
1672:           if (i > 1) {
1673:             pcbddc->n_ISForDofsLocal = i;
1674:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1675:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1676:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1677:             }
1678:           }
1679:         }
1680:       }
1681:     } else {
1682:       PetscInt i;
1683:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1684:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1685:       }
1686:     }
1687:   }

1689: boundary:
1690:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1691:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1692:   } else if (pcbddc->DirichletBoundariesLocal) {
1693:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1694:   }
1695:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1696:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1697:   } else if (pcbddc->NeumannBoundariesLocal) {
1698:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1699:   }
1700:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1701:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1702:   }
1703:   VecDestroy(&global);
1704:   VecDestroy(&local);
1705:   /* detect local disconnected subdomains if requested (use matis->A) */
1706:   if (pcbddc->detect_disconnected) {
1707:     IS        primalv = NULL;
1708:     PetscInt  i;
1709:     PetscBool filter = pcbddc->detect_disconnected_filter;

1711:     for (i=0;i<pcbddc->n_local_subs;i++) {
1712:       ISDestroy(&pcbddc->local_subs[i]);
1713:     }
1714:     PetscFree(pcbddc->local_subs);
1715:     PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1716:     PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1717:     ISDestroy(&primalv);
1718:   }
1719:   /* early stage corner detection */
1720:   {
1721:     DM dm;

1723:     MatGetDM(pc->pmat,&dm);
1724:     if (!dm) {
1725:       PCGetDM(pc,&dm);
1726:     }
1727:     if (dm) {
1728:       PetscBool isda;

1730:       PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1731:       if (isda) {
1732:         ISLocalToGlobalMapping l2l;
1733:         IS                     corners;
1734:         Mat                    lA;
1735:         PetscBool              gl,lo;

1737:         {
1738:           Vec               cvec;
1739:           const PetscScalar *coords;
1740:           PetscInt          dof,n,cdim;
1741:           PetscBool         memc = PETSC_TRUE;

1743:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1744:           DMGetCoordinates(dm,&cvec);
1745:           VecGetLocalSize(cvec,&n);
1746:           VecGetBlockSize(cvec,&cdim);
1747:           n   /= cdim;
1748:           PetscFree(pcbddc->mat_graph->coords);
1749:           PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1750:           VecGetArrayRead(cvec,&coords);
1751: #if defined(PETSC_USE_COMPLEX)
1752:           memc = PETSC_FALSE;
1753: #endif
1754:           if (dof != 1) memc = PETSC_FALSE;
1755:           if (memc) {
1756:             PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);
1757:           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1758:             PetscReal *bcoords = pcbddc->mat_graph->coords;
1759:             PetscInt  i, b, d;

1761:             for (i=0;i<n;i++) {
1762:               for (b=0;b<dof;b++) {
1763:                 for (d=0;d<cdim;d++) {
1764:                   bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1765:                 }
1766:               }
1767:             }
1768:           }
1769:           VecRestoreArrayRead(cvec,&coords);
1770:           pcbddc->mat_graph->cdim  = cdim;
1771:           pcbddc->mat_graph->cnloc = dof*n;
1772:           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1773:         }
1774:         DMDAGetSubdomainCornersIS(dm,&corners);
1775:         MatISGetLocalMat(pc->pmat,&lA);
1776:         MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1777:         MatISRestoreLocalMat(pc->pmat,&lA);
1778:         lo   = (PetscBool)(l2l && corners);
1779:         MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1780:         if (gl) { /* From PETSc's DMDA */
1781:           const PetscInt    *idx;
1782:           PetscInt          dof,bs,*idxout,n;

1784:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1785:           ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1786:           ISGetLocalSize(corners,&n);
1787:           ISGetIndices(corners,&idx);
1788:           if (bs == dof) {
1789:             PetscMalloc1(n,&idxout);
1790:             ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1791:           } else { /* the original DMDA local-to-local map have been modified */
1792:             PetscInt i,d;

1794:             PetscMalloc1(dof*n,&idxout);
1795:             for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1796:             ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);

1798:             bs = 1;
1799:             n *= dof;
1800:           }
1801:           ISRestoreIndices(corners,&idx);
1802:           DMDARestoreSubdomainCornersIS(dm,&corners);
1803:           ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1804:           PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1805:           ISDestroy(&corners);
1806:           pcbddc->corner_selected  = PETSC_TRUE;
1807:           pcbddc->corner_selection = PETSC_TRUE;
1808:         }
1809:         if (corners) {
1810:           DMDARestoreSubdomainCornersIS(dm,&corners);
1811:         }
1812:       }
1813:     }
1814:   }
1815:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1816:     DM dm;

1818:     MatGetDM(pc->pmat,&dm);
1819:     if (!dm) {
1820:       PCGetDM(pc,&dm);
1821:     }
1822:     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1823:       Vec            vcoords;
1824:       PetscSection   section;
1825:       PetscReal      *coords;
1826:       PetscInt       d,cdim,nl,nf,**ctxs;
1827:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);

1829:       DMGetCoordinateDim(dm,&cdim);
1830:       DMGetLocalSection(dm,&section);
1831:       PetscSectionGetNumFields(section,&nf);
1832:       DMCreateGlobalVector(dm,&vcoords);
1833:       VecGetLocalSize(vcoords,&nl);
1834:       PetscMalloc1(nl*cdim,&coords);
1835:       PetscMalloc2(nf,&funcs,nf,&ctxs);
1836:       PetscMalloc1(nf,&ctxs[0]);
1837:       for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1838:       for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1839:       for (d=0;d<cdim;d++) {
1840:         PetscInt          i;
1841:         const PetscScalar *v;

1843:         for (i=0;i<nf;i++) ctxs[i][0] = d;
1844:         DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1845:         VecGetArrayRead(vcoords,&v);
1846:         for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1847:         VecRestoreArrayRead(vcoords,&v);
1848:       }
1849:       VecDestroy(&vcoords);
1850:       PCSetCoordinates(pc,cdim,nl,coords);
1851:       PetscFree(coords);
1852:       PetscFree(ctxs[0]);
1853:       PetscFree2(funcs,ctxs);
1854:     }
1855:   }
1856:   return(0);
1857: }

1859: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1860: {
1861:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1862:   PetscErrorCode  ierr;
1863:   IS              nis;
1864:   const PetscInt  *idxs;
1865:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1866:   PetscBool       *ld;

1869:   if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1870:   if (mop == MPI_LAND) {
1871:     /* init rootdata with true */
1872:     ld   = (PetscBool*) matis->sf_rootdata;
1873:     for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1874:   } else {
1875:     PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
1876:   }
1877:   PetscArrayzero(matis->sf_leafdata,n);
1878:   ISGetLocalSize(*is,&nd);
1879:   ISGetIndices(*is,&idxs);
1880:   ld   = (PetscBool*) matis->sf_leafdata;
1881:   for (i=0;i<nd;i++)
1882:     if (-1 < idxs[i] && idxs[i] < n)
1883:       ld[idxs[i]] = PETSC_TRUE;
1884:   ISRestoreIndices(*is,&idxs);
1885:   PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1886:   PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1887:   PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
1888:   PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
1889:   if (mop == MPI_LAND) {
1890:     PetscMalloc1(nd,&nidxs);
1891:   } else {
1892:     PetscMalloc1(n,&nidxs);
1893:   }
1894:   for (i=0,nnd=0;i<n;i++)
1895:     if (ld[i])
1896:       nidxs[nnd++] = i;
1897:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1898:   ISDestroy(is);
1899:   *is  = nis;
1900:   return(0);
1901: }

1903: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1904: {
1905:   PC_IS             *pcis = (PC_IS*)(pc->data);
1906:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1907:   PetscErrorCode    ierr;

1910:   if (!pcbddc->benign_have_null) {
1911:     return(0);
1912:   }
1913:   if (pcbddc->ChangeOfBasisMatrix) {
1914:     Vec swap;

1916:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1917:     swap = pcbddc->work_change;
1918:     pcbddc->work_change = r;
1919:     r = swap;
1920:   }
1921:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1922:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1923:   PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);
1924:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1925:   PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);
1926:   KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1927:   VecSet(z,0.);
1928:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1929:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1930:   if (pcbddc->ChangeOfBasisMatrix) {
1931:     pcbddc->work_change = r;
1932:     VecCopy(z,pcbddc->work_change);
1933:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1934:   }
1935:   return(0);
1936: }

1938: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1939: {
1940:   PCBDDCBenignMatMult_ctx ctx;
1941:   PetscErrorCode          ierr;
1942:   PetscBool               apply_right,apply_left,reset_x;

1945:   MatShellGetContext(A,&ctx);
1946:   if (transpose) {
1947:     apply_right = ctx->apply_left;
1948:     apply_left = ctx->apply_right;
1949:   } else {
1950:     apply_right = ctx->apply_right;
1951:     apply_left = ctx->apply_left;
1952:   }
1953:   reset_x = PETSC_FALSE;
1954:   if (apply_right) {
1955:     const PetscScalar *ax;
1956:     PetscInt          nl,i;

1958:     VecGetLocalSize(x,&nl);
1959:     VecGetArrayRead(x,&ax);
1960:     PetscArraycpy(ctx->work,ax,nl);
1961:     VecRestoreArrayRead(x,&ax);
1962:     for (i=0;i<ctx->benign_n;i++) {
1963:       PetscScalar    sum,val;
1964:       const PetscInt *idxs;
1965:       PetscInt       nz,j;
1966:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1967:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1968:       sum = 0.;
1969:       if (ctx->apply_p0) {
1970:         val = ctx->work[idxs[nz-1]];
1971:         for (j=0;j<nz-1;j++) {
1972:           sum += ctx->work[idxs[j]];
1973:           ctx->work[idxs[j]] += val;
1974:         }
1975:       } else {
1976:         for (j=0;j<nz-1;j++) {
1977:           sum += ctx->work[idxs[j]];
1978:         }
1979:       }
1980:       ctx->work[idxs[nz-1]] -= sum;
1981:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1982:     }
1983:     VecPlaceArray(x,ctx->work);
1984:     reset_x = PETSC_TRUE;
1985:   }
1986:   if (transpose) {
1987:     MatMultTranspose(ctx->A,x,y);
1988:   } else {
1989:     MatMult(ctx->A,x,y);
1990:   }
1991:   if (reset_x) {
1992:     VecResetArray(x);
1993:   }
1994:   if (apply_left) {
1995:     PetscScalar *ay;
1996:     PetscInt    i;

1998:     VecGetArray(y,&ay);
1999:     for (i=0;i<ctx->benign_n;i++) {
2000:       PetscScalar    sum,val;
2001:       const PetscInt *idxs;
2002:       PetscInt       nz,j;
2003:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
2004:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
2005:       val = -ay[idxs[nz-1]];
2006:       if (ctx->apply_p0) {
2007:         sum = 0.;
2008:         for (j=0;j<nz-1;j++) {
2009:           sum += ay[idxs[j]];
2010:           ay[idxs[j]] += val;
2011:         }
2012:         ay[idxs[nz-1]] += sum;
2013:       } else {
2014:         for (j=0;j<nz-1;j++) {
2015:           ay[idxs[j]] += val;
2016:         }
2017:         ay[idxs[nz-1]] = 0.;
2018:       }
2019:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2020:     }
2021:     VecRestoreArray(y,&ay);
2022:   }
2023:   return(0);
2024: }

2026: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2027: {

2031:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2032:   return(0);
2033: }

2035: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2036: {

2040:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2041:   return(0);
2042: }

2044: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2045: {
2046:   PC_IS                   *pcis = (PC_IS*)pc->data;
2047:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
2048:   PCBDDCBenignMatMult_ctx ctx;
2049:   PetscErrorCode          ierr;

2052:   if (!restore) {
2053:     Mat                A_IB,A_BI;
2054:     PetscScalar        *work;
2055:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

2057:     if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2058:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2059:     PetscMalloc1(pcis->n,&work);
2060:     MatCreate(PETSC_COMM_SELF,&A_IB);
2061:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2062:     MatSetType(A_IB,MATSHELL);
2063:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2064:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2065:     PetscNew(&ctx);
2066:     MatShellSetContext(A_IB,ctx);
2067:     ctx->apply_left = PETSC_TRUE;
2068:     ctx->apply_right = PETSC_FALSE;
2069:     ctx->apply_p0 = PETSC_FALSE;
2070:     ctx->benign_n = pcbddc->benign_n;
2071:     if (reuse) {
2072:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2073:       ctx->free = PETSC_FALSE;
2074:     } else { /* TODO: could be optimized for successive solves */
2075:       ISLocalToGlobalMapping N_to_D;
2076:       PetscInt               i;

2078:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2079:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2080:       for (i=0;i<pcbddc->benign_n;i++) {
2081:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2082:       }
2083:       ISLocalToGlobalMappingDestroy(&N_to_D);
2084:       ctx->free = PETSC_TRUE;
2085:     }
2086:     ctx->A = pcis->A_IB;
2087:     ctx->work = work;
2088:     MatSetUp(A_IB);
2089:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2090:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2091:     pcis->A_IB = A_IB;

2093:     /* A_BI as A_IB^T */
2094:     MatCreateTranspose(A_IB,&A_BI);
2095:     pcbddc->benign_original_mat = pcis->A_BI;
2096:     pcis->A_BI = A_BI;
2097:   } else {
2098:     if (!pcbddc->benign_original_mat) {
2099:       return(0);
2100:     }
2101:     MatShellGetContext(pcis->A_IB,&ctx);
2102:     MatDestroy(&pcis->A_IB);
2103:     pcis->A_IB = ctx->A;
2104:     ctx->A = NULL;
2105:     MatDestroy(&pcis->A_BI);
2106:     pcis->A_BI = pcbddc->benign_original_mat;
2107:     pcbddc->benign_original_mat = NULL;
2108:     if (ctx->free) {
2109:       PetscInt i;
2110:       for (i=0;i<ctx->benign_n;i++) {
2111:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
2112:       }
2113:       PetscFree(ctx->benign_zerodiag_subs);
2114:     }
2115:     PetscFree(ctx->work);
2116:     PetscFree(ctx);
2117:   }
2118:   return(0);
2119: }

2121: /* used just in bddc debug mode */
2122: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2123: {
2124:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2125:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2126:   Mat            An;

2130:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2131:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2132:   if (is1) {
2133:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2134:     MatDestroy(&An);
2135:   } else {
2136:     *B = An;
2137:   }
2138:   return(0);
2139: }

2141: /* TODO: add reuse flag */
2142: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2143: {
2144:   Mat            Bt;
2145:   PetscScalar    *a,*bdata;
2146:   const PetscInt *ii,*ij;
2147:   PetscInt       m,n,i,nnz,*bii,*bij;
2148:   PetscBool      flg_row;

2152:   MatGetSize(A,&n,&m);
2153:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2154:   MatSeqAIJGetArray(A,&a);
2155:   nnz = n;
2156:   for (i=0;i<ii[n];i++) {
2157:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2158:   }
2159:   PetscMalloc1(n+1,&bii);
2160:   PetscMalloc1(nnz,&bij);
2161:   PetscMalloc1(nnz,&bdata);
2162:   nnz = 0;
2163:   bii[0] = 0;
2164:   for (i=0;i<n;i++) {
2165:     PetscInt j;
2166:     for (j=ii[i];j<ii[i+1];j++) {
2167:       PetscScalar entry = a[j];
2168:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2169:         bij[nnz] = ij[j];
2170:         bdata[nnz] = entry;
2171:         nnz++;
2172:       }
2173:     }
2174:     bii[i+1] = nnz;
2175:   }
2176:   MatSeqAIJRestoreArray(A,&a);
2177:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2178:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2179:   {
2180:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2181:     b->free_a = PETSC_TRUE;
2182:     b->free_ij = PETSC_TRUE;
2183:   }
2184:   if (*B == A) {
2185:     MatDestroy(&A);
2186:   }
2187:   *B = Bt;
2188:   return(0);
2189: }

2191: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2192: {
2193:   Mat                    B = NULL;
2194:   DM                     dm;
2195:   IS                     is_dummy,*cc_n;
2196:   ISLocalToGlobalMapping l2gmap_dummy;
2197:   PCBDDCGraph            graph;
2198:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2199:   PetscInt               i,n;
2200:   PetscInt               *xadj,*adjncy;
2201:   PetscBool              isplex = PETSC_FALSE;
2202:   PetscErrorCode         ierr;

2205:   if (ncc) *ncc = 0;
2206:   if (cc) *cc = NULL;
2207:   if (primalv) *primalv = NULL;
2208:   PCBDDCGraphCreate(&graph);
2209:   MatGetDM(pc->pmat,&dm);
2210:   if (!dm) {
2211:     PCGetDM(pc,&dm);
2212:   }
2213:   if (dm) {
2214:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2215:   }
2216:   if (filter) isplex = PETSC_FALSE;

2218:   if (isplex) { /* this code has been modified from plexpartition.c */
2219:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2220:     PetscInt      *adj = NULL;
2221:     IS             cellNumbering;
2222:     const PetscInt *cellNum;
2223:     PetscBool      useCone, useClosure;
2224:     PetscSection   section;
2225:     PetscSegBuffer adjBuffer;
2226:     PetscSF        sfPoint;

2230:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2231:     DMGetPointSF(dm, &sfPoint);
2232:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2233:     /* Build adjacency graph via a section/segbuffer */
2234:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2235:     PetscSectionSetChart(section, pStart, pEnd);
2236:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2237:     /* Always use FVM adjacency to create partitioner graph */
2238:     DMGetBasicAdjacency(dm, &useCone, &useClosure);
2239:     DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2240:     DMPlexGetCellNumbering(dm, &cellNumbering);
2241:     ISGetIndices(cellNumbering, &cellNum);
2242:     for (n = 0, p = pStart; p < pEnd; p++) {
2243:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2244:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2245:       adjSize = PETSC_DETERMINE;
2246:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2247:       for (a = 0; a < adjSize; ++a) {
2248:         const PetscInt point = adj[a];
2249:         if (pStart <= point && point < pEnd) {
2250:           PetscInt *PETSC_RESTRICT pBuf;
2251:           PetscSectionAddDof(section, p, 1);
2252:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2253:           *pBuf = point;
2254:         }
2255:       }
2256:       n++;
2257:     }
2258:     DMSetBasicAdjacency(dm, useCone, useClosure);
2259:     /* Derive CSR graph from section/segbuffer */
2260:     PetscSectionSetUp(section);
2261:     PetscSectionGetStorageSize(section, &size);
2262:     PetscMalloc1(n+1, &xadj);
2263:     for (idx = 0, p = pStart; p < pEnd; p++) {
2264:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2265:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2266:     }
2267:     xadj[n] = size;
2268:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2269:     /* Clean up */
2270:     PetscSegBufferDestroy(&adjBuffer);
2271:     PetscSectionDestroy(&section);
2272:     PetscFree(adj);
2273:     graph->xadj = xadj;
2274:     graph->adjncy = adjncy;
2275:   } else {
2276:     Mat       A;
2277:     PetscBool isseqaij, flg_row;

2279:     MatISGetLocalMat(pc->pmat,&A);
2280:     if (!A->rmap->N || !A->cmap->N) {
2281:       PCBDDCGraphDestroy(&graph);
2282:       return(0);
2283:     }
2284:     PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2285:     if (!isseqaij && filter) {
2286:       PetscBool isseqdense;

2288:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2289:       if (!isseqdense) {
2290:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2291:       } else { /* TODO: rectangular case and LDA */
2292:         PetscScalar *array;
2293:         PetscReal   chop=1.e-6;

2295:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2296:         MatDenseGetArray(B,&array);
2297:         MatGetSize(B,&n,NULL);
2298:         for (i=0;i<n;i++) {
2299:           PetscInt j;
2300:           for (j=i+1;j<n;j++) {
2301:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2302:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2303:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2304:           }
2305:         }
2306:         MatDenseRestoreArray(B,&array);
2307:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2308:       }
2309:     } else {
2310:       PetscObjectReference((PetscObject)A);
2311:       B = A;
2312:     }
2313:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2315:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2316:     if (filter) {
2317:       PetscScalar *data;
2318:       PetscInt    j,cum;

2320:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2321:       MatSeqAIJGetArray(B,&data);
2322:       cum = 0;
2323:       for (i=0;i<n;i++) {
2324:         PetscInt t;

2326:         for (j=xadj[i];j<xadj[i+1];j++) {
2327:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2328:             continue;
2329:           }
2330:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2331:         }
2332:         t = xadj_filtered[i];
2333:         xadj_filtered[i] = cum;
2334:         cum += t;
2335:       }
2336:       MatSeqAIJRestoreArray(B,&data);
2337:       graph->xadj = xadj_filtered;
2338:       graph->adjncy = adjncy_filtered;
2339:     } else {
2340:       graph->xadj = xadj;
2341:       graph->adjncy = adjncy;
2342:     }
2343:   }
2344:   /* compute local connected components using PCBDDCGraph */
2345:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2346:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2347:   ISDestroy(&is_dummy);
2348:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2349:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2350:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2351:   PCBDDCGraphComputeConnectedComponents(graph);

2353:   /* partial clean up */
2354:   PetscFree2(xadj_filtered,adjncy_filtered);
2355:   if (B) {
2356:     PetscBool flg_row;
2357:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2358:     MatDestroy(&B);
2359:   }
2360:   if (isplex) {
2361:     PetscFree(xadj);
2362:     PetscFree(adjncy);
2363:   }

2365:   /* get back data */
2366:   if (isplex) {
2367:     if (ncc) *ncc = graph->ncc;
2368:     if (cc || primalv) {
2369:       Mat          A;
2370:       PetscBT      btv,btvt;
2371:       PetscSection subSection;
2372:       PetscInt     *ids,cum,cump,*cids,*pids;

2374:       DMPlexGetSubdomainSection(dm,&subSection);
2375:       MatISGetLocalMat(pc->pmat,&A);
2376:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2377:       PetscBTCreate(A->rmap->n,&btv);
2378:       PetscBTCreate(A->rmap->n,&btvt);

2380:       cids[0] = 0;
2381:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2382:         PetscInt j;

2384:         PetscBTMemzero(A->rmap->n,btvt);
2385:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2386:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2388:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2389:           for (k = 0; k < 2*size; k += 2) {
2390:             PetscInt s, pp, p = closure[k], off, dof, cdof;

2392:             PetscSectionGetConstraintDof(subSection,p,&cdof);
2393:             PetscSectionGetOffset(subSection,p,&off);
2394:             PetscSectionGetDof(subSection,p,&dof);
2395:             for (s = 0; s < dof-cdof; s++) {
2396:               if (PetscBTLookupSet(btvt,off+s)) continue;
2397:               if (!PetscBTLookup(btv,off+s)) {
2398:                 ids[cum++] = off+s;
2399:               } else { /* cross-vertex */
2400:                 pids[cump++] = off+s;
2401:               }
2402:             }
2403:             DMPlexGetTreeParent(dm,p,&pp,NULL);
2404:             if (pp != p) {
2405:               PetscSectionGetConstraintDof(subSection,pp,&cdof);
2406:               PetscSectionGetOffset(subSection,pp,&off);
2407:               PetscSectionGetDof(subSection,pp,&dof);
2408:               for (s = 0; s < dof-cdof; s++) {
2409:                 if (PetscBTLookupSet(btvt,off+s)) continue;
2410:                 if (!PetscBTLookup(btv,off+s)) {
2411:                   ids[cum++] = off+s;
2412:                 } else { /* cross-vertex */
2413:                   pids[cump++] = off+s;
2414:                 }
2415:               }
2416:             }
2417:           }
2418:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2419:         }
2420:         cids[i+1] = cum;
2421:         /* mark dofs as already assigned */
2422:         for (j = cids[i]; j < cids[i+1]; j++) {
2423:           PetscBTSet(btv,ids[j]);
2424:         }
2425:       }
2426:       if (cc) {
2427:         PetscMalloc1(graph->ncc,&cc_n);
2428:         for (i = 0; i < graph->ncc; i++) {
2429:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2430:         }
2431:         *cc = cc_n;
2432:       }
2433:       if (primalv) {
2434:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2435:       }
2436:       PetscFree3(ids,cids,pids);
2437:       PetscBTDestroy(&btv);
2438:       PetscBTDestroy(&btvt);
2439:     }
2440:   } else {
2441:     if (ncc) *ncc = graph->ncc;
2442:     if (cc) {
2443:       PetscMalloc1(graph->ncc,&cc_n);
2444:       for (i=0;i<graph->ncc;i++) {
2445:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2446:       }
2447:       *cc = cc_n;
2448:     }
2449:   }
2450:   /* clean up graph */
2451:   graph->xadj = NULL;
2452:   graph->adjncy = NULL;
2453:   PCBDDCGraphDestroy(&graph);
2454:   return(0);
2455: }

2457: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2458: {
2459:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2460:   PC_IS*         pcis = (PC_IS*)(pc->data);
2461:   IS             dirIS = NULL;
2462:   PetscInt       i;

2466:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2467:   if (zerodiag) {
2468:     Mat            A;
2469:     Vec            vec3_N;
2470:     PetscScalar    *vals;
2471:     const PetscInt *idxs;
2472:     PetscInt       nz,*count;

2474:     /* p0 */
2475:     VecSet(pcis->vec1_N,0.);
2476:     PetscMalloc1(pcis->n,&vals);
2477:     ISGetLocalSize(zerodiag,&nz);
2478:     ISGetIndices(zerodiag,&idxs);
2479:     for (i=0;i<nz;i++) vals[i] = 1.;
2480:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2481:     VecAssemblyBegin(pcis->vec1_N);
2482:     VecAssemblyEnd(pcis->vec1_N);
2483:     /* v_I */
2484:     VecSetRandom(pcis->vec2_N,NULL);
2485:     for (i=0;i<nz;i++) vals[i] = 0.;
2486:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2487:     ISRestoreIndices(zerodiag,&idxs);
2488:     ISGetIndices(pcis->is_B_local,&idxs);
2489:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2490:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2491:     ISRestoreIndices(pcis->is_B_local,&idxs);
2492:     if (dirIS) {
2493:       PetscInt n;

2495:       ISGetLocalSize(dirIS,&n);
2496:       ISGetIndices(dirIS,&idxs);
2497:       for (i=0;i<n;i++) vals[i] = 0.;
2498:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2499:       ISRestoreIndices(dirIS,&idxs);
2500:     }
2501:     VecAssemblyBegin(pcis->vec2_N);
2502:     VecAssemblyEnd(pcis->vec2_N);
2503:     VecDuplicate(pcis->vec1_N,&vec3_N);
2504:     VecSet(vec3_N,0.);
2505:     MatISGetLocalMat(pc->pmat,&A);
2506:     MatMult(A,pcis->vec1_N,vec3_N);
2507:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2508:     if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2509:     PetscFree(vals);
2510:     VecDestroy(&vec3_N);

2512:     /* there should not be any pressure dofs lying on the interface */
2513:     PetscCalloc1(pcis->n,&count);
2514:     ISGetIndices(pcis->is_B_local,&idxs);
2515:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2516:     ISRestoreIndices(pcis->is_B_local,&idxs);
2517:     ISGetIndices(zerodiag,&idxs);
2518:     for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2519:     ISRestoreIndices(zerodiag,&idxs);
2520:     PetscFree(count);
2521:   }
2522:   ISDestroy(&dirIS);

2524:   /* check PCBDDCBenignGetOrSetP0 */
2525:   VecSetRandom(pcis->vec1_global,NULL);
2526:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2527:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2528:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2529:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2530:   for (i=0;i<pcbddc->benign_n;i++) {
2531:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2532:     if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2533:   }
2534:   return(0);
2535: }

2537: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2538: {
2539:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2540:   IS             pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2541:   PetscInt       nz,n,benign_n,bsp = 1;
2542:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2543:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2547:   if (reuse) goto project_b0;
2548:   PetscSFDestroy(&pcbddc->benign_sf);
2549:   MatDestroy(&pcbddc->benign_B0);
2550:   for (n=0;n<pcbddc->benign_n;n++) {
2551:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2552:   }
2553:   PetscFree(pcbddc->benign_zerodiag_subs);
2554:   has_null_pressures = PETSC_TRUE;
2555:   have_null = PETSC_TRUE;
2556:   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2557:      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2558:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2559:      If not, a change of basis on pressures is not needed
2560:      since the local Schur complements are already SPD
2561:   */
2562:   if (pcbddc->n_ISForDofsLocal) {
2563:     IS        iP = NULL;
2564:     PetscInt  p,*pp;
2565:     PetscBool flg;

2567:     PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2568:     n    = pcbddc->n_ISForDofsLocal;
2569:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2570:     PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2571:     PetscOptionsEnd();
2572:     if (!flg) {
2573:       n = 1;
2574:       pp[0] = pcbddc->n_ISForDofsLocal-1;
2575:     }

2577:     bsp = 0;
2578:     for (p=0;p<n;p++) {
2579:       PetscInt bs;

2581:       if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2582:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2583:       bsp += bs;
2584:     }
2585:     PetscMalloc1(bsp,&bzerodiag);
2586:     bsp  = 0;
2587:     for (p=0;p<n;p++) {
2588:       const PetscInt *idxs;
2589:       PetscInt       b,bs,npl,*bidxs;

2591:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2592:       ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2593:       ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2594:       PetscMalloc1(npl/bs,&bidxs);
2595:       for (b=0;b<bs;b++) {
2596:         PetscInt i;

2598:         for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2599:         ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2600:         bsp++;
2601:       }
2602:       PetscFree(bidxs);
2603:       ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2604:     }
2605:     ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);

2607:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2608:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2609:     if (iP) {
2610:       IS newpressures;

2612:       ISDifference(pressures,iP,&newpressures);
2613:       ISDestroy(&pressures);
2614:       pressures = newpressures;
2615:     }
2616:     ISSorted(pressures,&sorted);
2617:     if (!sorted) {
2618:       ISSort(pressures);
2619:     }
2620:     PetscFree(pp);
2621:   }

2623:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2624:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2625:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2626:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2627:   ISSorted(zerodiag,&sorted);
2628:   if (!sorted) {
2629:     ISSort(zerodiag);
2630:   }
2631:   PetscObjectReference((PetscObject)zerodiag);
2632:   zerodiag_save = zerodiag;
2633:   ISGetLocalSize(zerodiag,&nz);
2634:   if (!nz) {
2635:     if (n) have_null = PETSC_FALSE;
2636:     has_null_pressures = PETSC_FALSE;
2637:     ISDestroy(&zerodiag);
2638:   }
2639:   recompute_zerodiag = PETSC_FALSE;

2641:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2642:   zerodiag_subs    = NULL;
2643:   benign_n         = 0;
2644:   n_interior_dofs  = 0;
2645:   interior_dofs    = NULL;
2646:   nneu             = 0;
2647:   if (pcbddc->NeumannBoundariesLocal) {
2648:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2649:   }
2650:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2651:   if (checkb) { /* need to compute interior nodes */
2652:     PetscInt n,i,j;
2653:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2654:     PetscInt *iwork;

2656:     ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2657:     ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2658:     PetscCalloc1(n,&iwork);
2659:     PetscMalloc1(n,&interior_dofs);
2660:     for (i=1;i<n_neigh;i++)
2661:       for (j=0;j<n_shared[i];j++)
2662:           iwork[shared[i][j]] += 1;
2663:     for (i=0;i<n;i++)
2664:       if (!iwork[i])
2665:         interior_dofs[n_interior_dofs++] = i;
2666:     PetscFree(iwork);
2667:     ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2668:   }
2669:   if (has_null_pressures) {
2670:     IS             *subs;
2671:     PetscInt       nsubs,i,j,nl;
2672:     const PetscInt *idxs;
2673:     PetscScalar    *array;
2674:     Vec            *work;
2675:     Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);

2677:     subs  = pcbddc->local_subs;
2678:     nsubs = pcbddc->n_local_subs;
2679:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2680:     if (checkb) {
2681:       VecDuplicateVecs(matis->y,2,&work);
2682:       ISGetLocalSize(zerodiag,&nl);
2683:       ISGetIndices(zerodiag,&idxs);
2684:       /* work[0] = 1_p */
2685:       VecSet(work[0],0.);
2686:       VecGetArray(work[0],&array);
2687:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2688:       VecRestoreArray(work[0],&array);
2689:       /* work[0] = 1_v */
2690:       VecSet(work[1],1.);
2691:       VecGetArray(work[1],&array);
2692:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2693:       VecRestoreArray(work[1],&array);
2694:       ISRestoreIndices(zerodiag,&idxs);
2695:     }

2697:     if (nsubs > 1 || bsp > 1) {
2698:       IS       *is;
2699:       PetscInt b,totb;

2701:       totb  = bsp;
2702:       is    = bsp > 1 ? bzerodiag : &zerodiag;
2703:       nsubs = PetscMax(nsubs,1);
2704:       PetscCalloc1(nsubs*totb,&zerodiag_subs);
2705:       for (b=0;b<totb;b++) {
2706:         for (i=0;i<nsubs;i++) {
2707:           ISLocalToGlobalMapping l2g;
2708:           IS                     t_zerodiag_subs;
2709:           PetscInt               nl;

2711:           if (subs) {
2712:             ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2713:           } else {
2714:             IS tis;

2716:             MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2717:             ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2718:             ISLocalToGlobalMappingCreateIS(tis,&l2g);
2719:             ISDestroy(&tis);
2720:           }
2721:           ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2722:           ISGetLocalSize(t_zerodiag_subs,&nl);
2723:           if (nl) {
2724:             PetscBool valid = PETSC_TRUE;

2726:             if (checkb) {
2727:               VecSet(matis->x,0);
2728:               ISGetLocalSize(subs[i],&nl);
2729:               ISGetIndices(subs[i],&idxs);
2730:               VecGetArray(matis->x,&array);
2731:               for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2732:               VecRestoreArray(matis->x,&array);
2733:               ISRestoreIndices(subs[i],&idxs);
2734:               VecPointwiseMult(matis->x,work[0],matis->x);
2735:               MatMult(matis->A,matis->x,matis->y);
2736:               VecPointwiseMult(matis->y,work[1],matis->y);
2737:               VecGetArray(matis->y,&array);
2738:               for (j=0;j<n_interior_dofs;j++) {
2739:                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2740:                   valid = PETSC_FALSE;
2741:                   break;
2742:                 }
2743:               }
2744:               VecRestoreArray(matis->y,&array);
2745:             }
2746:             if (valid && nneu) {
2747:               const PetscInt *idxs;
2748:               PetscInt       nzb;

2750:               ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2751:               ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2752:               ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2753:               if (nzb) valid = PETSC_FALSE;
2754:             }
2755:             if (valid && pressures) {
2756:               IS       t_pressure_subs,tmp;
2757:               PetscInt i1,i2;

2759:               ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2760:               ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2761:               ISGetLocalSize(tmp,&i1);
2762:               ISGetLocalSize(t_zerodiag_subs,&i2);
2763:               if (i2 != i1) valid = PETSC_FALSE;
2764:               ISDestroy(&t_pressure_subs);
2765:               ISDestroy(&tmp);
2766:             }
2767:             if (valid) {
2768:               ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2769:               benign_n++;
2770:             } else recompute_zerodiag = PETSC_TRUE;
2771:           }
2772:           ISDestroy(&t_zerodiag_subs);
2773:           ISLocalToGlobalMappingDestroy(&l2g);
2774:         }
2775:       }
2776:     } else { /* there's just one subdomain (or zero if they have not been detected */
2777:       PetscBool valid = PETSC_TRUE;

2779:       if (nneu) valid = PETSC_FALSE;
2780:       if (valid && pressures) {
2781:         ISEqual(pressures,zerodiag,&valid);
2782:       }
2783:       if (valid && checkb) {
2784:         MatMult(matis->A,work[0],matis->x);
2785:         VecPointwiseMult(matis->x,work[1],matis->x);
2786:         VecGetArray(matis->x,&array);
2787:         for (j=0;j<n_interior_dofs;j++) {
2788:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2789:             valid = PETSC_FALSE;
2790:             break;
2791:           }
2792:         }
2793:         VecRestoreArray(matis->x,&array);
2794:       }
2795:       if (valid) {
2796:         benign_n = 1;
2797:         PetscMalloc1(benign_n,&zerodiag_subs);
2798:         PetscObjectReference((PetscObject)zerodiag);
2799:         zerodiag_subs[0] = zerodiag;
2800:       }
2801:     }
2802:     if (checkb) {
2803:       VecDestroyVecs(2,&work);
2804:     }
2805:   }
2806:   PetscFree(interior_dofs);

2808:   if (!benign_n) {
2809:     PetscInt n;

2811:     ISDestroy(&zerodiag);
2812:     recompute_zerodiag = PETSC_FALSE;
2813:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2814:     if (n) have_null = PETSC_FALSE;
2815:   }

2817:   /* final check for null pressures */
2818:   if (zerodiag && pressures) {
2819:     ISEqual(pressures,zerodiag,&have_null);
2820:   }

2822:   if (recompute_zerodiag) {
2823:     ISDestroy(&zerodiag);
2824:     if (benign_n == 1) {
2825:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2826:       zerodiag = zerodiag_subs[0];
2827:     } else {
2828:       PetscInt i,nzn,*new_idxs;

2830:       nzn = 0;
2831:       for (i=0;i<benign_n;i++) {
2832:         PetscInt ns;
2833:         ISGetLocalSize(zerodiag_subs[i],&ns);
2834:         nzn += ns;
2835:       }
2836:       PetscMalloc1(nzn,&new_idxs);
2837:       nzn = 0;
2838:       for (i=0;i<benign_n;i++) {
2839:         PetscInt ns,*idxs;
2840:         ISGetLocalSize(zerodiag_subs[i],&ns);
2841:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2842:         PetscArraycpy(new_idxs+nzn,idxs,ns);
2843:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2844:         nzn += ns;
2845:       }
2846:       PetscSortInt(nzn,new_idxs);
2847:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2848:     }
2849:     have_null = PETSC_FALSE;
2850:   }

2852:   /* determines if the coarse solver will be singular or not */
2853:   MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));

2855:   /* Prepare matrix to compute no-net-flux */
2856:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2857:     Mat                    A,loc_divudotp;
2858:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2859:     IS                     row,col,isused = NULL;
2860:     PetscInt               M,N,n,st,n_isused;

2862:     if (pressures) {
2863:       isused = pressures;
2864:     } else {
2865:       isused = zerodiag_save;
2866:     }
2867:     MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2868:     MatISGetLocalMat(pc->pmat,&A);
2869:     MatGetLocalSize(A,&n,NULL);
2870:     if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2871:     n_isused = 0;
2872:     if (isused) {
2873:       ISGetLocalSize(isused,&n_isused);
2874:     }
2875:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2876:     st = st-n_isused;
2877:     if (n) {
2878:       const PetscInt *gidxs;

2880:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2881:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2882:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2883:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2884:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2885:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2886:     } else {
2887:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2888:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2889:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2890:     }
2891:     MatGetSize(pc->pmat,NULL,&N);
2892:     ISGetSize(row,&M);
2893:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2894:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2895:     ISDestroy(&row);
2896:     ISDestroy(&col);
2897:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2898:     MatSetType(pcbddc->divudotp,MATIS);
2899:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2900:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2901:     ISLocalToGlobalMappingDestroy(&rl2g);
2902:     ISLocalToGlobalMappingDestroy(&cl2g);
2903:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2904:     MatDestroy(&loc_divudotp);
2905:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2906:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2907:   }
2908:   ISDestroy(&zerodiag_save);
2909:   ISDestroy(&pressures);
2910:   if (bzerodiag) {
2911:     PetscInt i;

2913:     for (i=0;i<bsp;i++) {
2914:       ISDestroy(&bzerodiag[i]);
2915:     }
2916:     PetscFree(bzerodiag);
2917:   }
2918:   pcbddc->benign_n = benign_n;
2919:   pcbddc->benign_zerodiag_subs = zerodiag_subs;

2921:   /* determines if the problem has subdomains with 0 pressure block */
2922:   have_null = (PetscBool)(!!pcbddc->benign_n);
2923:   MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));

2925: project_b0:
2926:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2927:   /* change of basis and p0 dofs */
2928:   if (pcbddc->benign_n) {
2929:     PetscInt i,s,*nnz;

2931:     /* local change of basis for pressures */
2932:     MatDestroy(&pcbddc->benign_change);
2933:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2934:     MatSetType(pcbddc->benign_change,MATAIJ);
2935:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2936:     PetscMalloc1(n,&nnz);
2937:     for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2938:     for (i=0;i<pcbddc->benign_n;i++) {
2939:       const PetscInt *idxs;
2940:       PetscInt       nzs,j;

2942:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2943:       ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2944:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2945:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2946:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2947:     }
2948:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2949:     MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2950:     PetscFree(nnz);
2951:     /* set identity by default */
2952:     for (i=0;i<n;i++) {
2953:       MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2954:     }
2955:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2956:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2957:     /* set change on pressures */
2958:     for (s=0;s<pcbddc->benign_n;s++) {
2959:       PetscScalar    *array;
2960:       const PetscInt *idxs;
2961:       PetscInt       nzs;

2963:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2964:       ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2965:       for (i=0;i<nzs-1;i++) {
2966:         PetscScalar vals[2];
2967:         PetscInt    cols[2];

2969:         cols[0] = idxs[i];
2970:         cols[1] = idxs[nzs-1];
2971:         vals[0] = 1.;
2972:         vals[1] = 1.;
2973:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2974:       }
2975:       PetscMalloc1(nzs,&array);
2976:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2977:       array[nzs-1] = 1.;
2978:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2979:       /* store local idxs for p0 */
2980:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2981:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2982:       PetscFree(array);
2983:     }
2984:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2985:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);

2987:     /* project if needed */
2988:     if (pcbddc->benign_change_explicit) {
2989:       Mat M;

2991:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2992:       MatDestroy(&pcbddc->local_mat);
2993:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2994:       MatDestroy(&M);
2995:     }
2996:     /* store global idxs for p0 */
2997:     ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2998:   }
2999:   *zerodiaglocal = zerodiag;
3000:   return(0);
3001: }

3003: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3004: {
3005:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3006:   PetscScalar    *array;

3010:   if (!pcbddc->benign_sf) {
3011:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
3012:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
3013:   }
3014:   if (get) {
3015:     VecGetArrayRead(v,(const PetscScalar**)&array);
3016:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);
3017:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);
3018:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
3019:   } else {
3020:     VecGetArray(v,&array);
3021:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);
3022:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);
3023:     VecRestoreArray(v,&array);
3024:   }
3025:   return(0);
3026: }

3028: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3029: {
3030:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

3034:   /* TODO: add error checking
3035:     - avoid nested pop (or push) calls.
3036:     - cannot push before pop.
3037:     - cannot call this if pcbddc->local_mat is NULL
3038:   */
3039:   if (!pcbddc->benign_n) {
3040:     return(0);
3041:   }
3042:   if (pop) {
3043:     if (pcbddc->benign_change_explicit) {
3044:       IS       is_p0;
3045:       MatReuse reuse;

3047:       /* extract B_0 */
3048:       reuse = MAT_INITIAL_MATRIX;
3049:       if (pcbddc->benign_B0) {
3050:         reuse = MAT_REUSE_MATRIX;
3051:       }
3052:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3053:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3054:       /* remove rows and cols from local problem */
3055:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3056:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3057:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3058:       ISDestroy(&is_p0);
3059:     } else {
3060:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
3061:       PetscScalar *vals;
3062:       PetscInt    i,n,*idxs_ins;

3064:       VecGetLocalSize(matis->y,&n);
3065:       PetscMalloc2(n,&idxs_ins,n,&vals);
3066:       if (!pcbddc->benign_B0) {
3067:         PetscInt *nnz;
3068:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3069:         MatSetType(pcbddc->benign_B0,MATAIJ);
3070:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3071:         PetscMalloc1(pcbddc->benign_n,&nnz);
3072:         for (i=0;i<pcbddc->benign_n;i++) {
3073:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3074:           nnz[i] = n - nnz[i];
3075:         }
3076:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3077:         MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3078:         PetscFree(nnz);
3079:       }

3081:       for (i=0;i<pcbddc->benign_n;i++) {
3082:         PetscScalar *array;
3083:         PetscInt    *idxs,j,nz,cum;

3085:         VecSet(matis->x,0.);
3086:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3087:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3088:         for (j=0;j<nz;j++) vals[j] = 1.;
3089:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3090:         VecAssemblyBegin(matis->x);
3091:         VecAssemblyEnd(matis->x);
3092:         VecSet(matis->y,0.);
3093:         MatMult(matis->A,matis->x,matis->y);
3094:         VecGetArray(matis->y,&array);
3095:         cum = 0;
3096:         for (j=0;j<n;j++) {
3097:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3098:             vals[cum] = array[j];
3099:             idxs_ins[cum] = j;
3100:             cum++;
3101:           }
3102:         }
3103:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3104:         VecRestoreArray(matis->y,&array);
3105:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3106:       }
3107:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3108:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3109:       PetscFree2(idxs_ins,vals);
3110:     }
3111:   } else { /* push */
3112:     if (pcbddc->benign_change_explicit) {
3113:       PetscInt i;

3115:       for (i=0;i<pcbddc->benign_n;i++) {
3116:         PetscScalar *B0_vals;
3117:         PetscInt    *B0_cols,B0_ncol;

3119:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3120:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3121:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3122:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3123:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3124:       }
3125:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3126:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3127:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3128:   }
3129:   return(0);
3130: }

3132: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3133: {
3134:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3135:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3136:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
3137:   PetscBLASInt    *B_iwork,*B_ifail;
3138:   PetscScalar     *work,lwork;
3139:   PetscScalar     *St,*S,*eigv;
3140:   PetscScalar     *Sarray,*Starray;
3141:   PetscReal       *eigs,thresh,lthresh,uthresh;
3142:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3143:   PetscBool       allocated_S_St;
3144: #if defined(PETSC_USE_COMPLEX)
3145:   PetscReal       *rwork;
3146: #endif
3147:   PetscErrorCode  ierr;

3150:   if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3151:   if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3152:   if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3153:   PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);

3155:   if (pcbddc->dbg_flag) {
3156:     PetscViewerFlush(pcbddc->dbg_viewer);
3157:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3158:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3159:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3160:   }

3162:   if (pcbddc->dbg_flag) {
3163:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3164:   }

3166:   /* max size of subsets */
3167:   mss = 0;
3168:   for (i=0;i<sub_schurs->n_subs;i++) {
3169:     PetscInt subset_size;

3171:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3172:     mss = PetscMax(mss,subset_size);
3173:   }

3175:   /* min/max and threshold */
3176:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3177:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3178:   nmax = PetscMax(nmin,nmax);
3179:   allocated_S_St = PETSC_FALSE;
3180:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3181:     allocated_S_St = PETSC_TRUE;
3182:   }

3184:   /* allocate lapack workspace */
3185:   cum = cum2 = 0;
3186:   maxneigs = 0;
3187:   for (i=0;i<sub_schurs->n_subs;i++) {
3188:     PetscInt n,subset_size;

3190:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3191:     n = PetscMin(subset_size,nmax);
3192:     cum += subset_size;
3193:     cum2 += subset_size*n;
3194:     maxneigs = PetscMax(maxneigs,n);
3195:   }
3196:   lwork = 0;
3197:   if (mss) {
3198:     if (sub_schurs->is_symmetric) {
3199:       PetscScalar  sdummy = 0.;
3200:       PetscBLASInt B_itype = 1;
3201:       PetscBLASInt B_N = mss, idummy = 0;
3202:       PetscReal    rdummy = 0.,zero = 0.0;
3203:       PetscReal    eps = 0.0; /* dlamch? */

3205:       B_lwork = -1;
3206:       /* some implementations may complain about NULL pointers, even if we are querying */
3207:       S = &sdummy;
3208:       St = &sdummy;
3209:       eigs = &rdummy;
3210:       eigv = &sdummy;
3211:       B_iwork = &idummy;
3212:       B_ifail = &idummy;
3213: #if defined(PETSC_USE_COMPLEX)
3214:       rwork = &rdummy;
3215: #endif
3216:       thresh = 1.0;
3217:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3218: #if defined(PETSC_USE_COMPLEX)
3219:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3220: #else
3221:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3222: #endif
3223:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3224:       PetscFPTrapPop();
3225:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3226:   }

3228:   nv = 0;
3229:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3230:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3231:   }
3232:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3233:   if (allocated_S_St) {
3234:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3235:   }
3236:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3237: #if defined(PETSC_USE_COMPLEX)
3238:   PetscMalloc1(7*mss,&rwork);
3239: #endif
3240:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3241:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3242:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3243:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3244:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3245:   PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);

3247:   maxneigs = 0;
3248:   cum = cumarray = 0;
3249:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3250:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3251:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3252:     const PetscInt *idxs;

3254:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3255:     for (cum=0;cum<nv;cum++) {
3256:       pcbddc->adaptive_constraints_n[cum] = 1;
3257:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3258:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3259:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3260:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3261:     }
3262:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3263:   }

3265:   if (mss) { /* multilevel */
3266:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3267:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3268:   }

3270:   lthresh = pcbddc->adaptive_threshold[0];
3271:   uthresh = pcbddc->adaptive_threshold[1];
3272:   for (i=0;i<sub_schurs->n_subs;i++) {
3273:     const PetscInt *idxs;
3274:     PetscReal      upper,lower;
3275:     PetscInt       j,subset_size,eigs_start = 0;
3276:     PetscBLASInt   B_N;
3277:     PetscBool      same_data = PETSC_FALSE;
3278:     PetscBool      scal = PETSC_FALSE;

3280:     if (pcbddc->use_deluxe_scaling) {
3281:       upper = PETSC_MAX_REAL;
3282:       lower = uthresh;
3283:     } else {
3284:       if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3285:       upper = 1./uthresh;
3286:       lower = 0.;
3287:     }
3288:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3289:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3290:     PetscBLASIntCast(subset_size,&B_N);
3291:     /* this is experimental: we assume the dofs have been properly grouped to have
3292:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3293:     if (!sub_schurs->is_posdef) {
3294:       Mat T;

3296:       for (j=0;j<subset_size;j++) {
3297:         if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3298:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3299:           MatScale(T,-1.0);
3300:           MatDestroy(&T);
3301:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3302:           MatScale(T,-1.0);
3303:           MatDestroy(&T);
3304:           if (sub_schurs->change_primal_sub) {
3305:             PetscInt       nz,k;
3306:             const PetscInt *idxs;

3308:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3309:             ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3310:             for (k=0;k<nz;k++) {
3311:               *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3312:               *(Starray + cumarray + idxs[k]*(subset_size+1))  = 0.0;
3313:             }
3314:             ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3315:           }
3316:           scal = PETSC_TRUE;
3317:           break;
3318:         }
3319:       }
3320:     }

3322:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3323:       if (sub_schurs->is_symmetric) {
3324:         PetscInt j,k;
3325:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3326:           PetscArrayzero(S,subset_size*subset_size);
3327:           PetscArrayzero(St,subset_size*subset_size);
3328:         }
3329:         for (j=0;j<subset_size;j++) {
3330:           for (k=j;k<subset_size;k++) {
3331:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3332:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3333:           }
3334:         }
3335:       } else {
3336:         PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3337:         PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3338:       }
3339:     } else {
3340:       S = Sarray + cumarray;
3341:       St = Starray + cumarray;
3342:     }
3343:     /* see if we can save some work */
3344:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3345:       PetscArraycmp(S,St,subset_size*subset_size,&same_data);
3346:     }

3348:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3349:       B_neigs = 0;
3350:     } else {
3351:       if (sub_schurs->is_symmetric) {
3352:         PetscBLASInt B_itype = 1;
3353:         PetscBLASInt B_IL, B_IU;
3354:         PetscReal    eps = -1.0; /* dlamch? */
3355:         PetscInt     nmin_s;
3356:         PetscBool    compute_range;

3358:         B_neigs = 0;
3359:         compute_range = (PetscBool)!same_data;
3360:         if (nmin >= subset_size) compute_range = PETSC_FALSE;

3362:         if (pcbddc->dbg_flag) {
3363:           PetscInt nc = 0;

3365:           if (sub_schurs->change_primal_sub) {
3366:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3367:           }
3368:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3369:         }

3371:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3372:         if (compute_range) {

3374:           /* ask for eigenvalues larger than thresh */
3375:           if (sub_schurs->is_posdef) {
3376: #if defined(PETSC_USE_COMPLEX)
3377:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3378: #else
3379:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3380: #endif
3381:             PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3382:           } else { /* no theory so far, but it works nicely */
3383:             PetscInt  recipe = 0,recipe_m = 1;
3384:             PetscReal bb[2];

3386:             PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3387:             switch (recipe) {
3388:             case 0:
3389:               if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3390:               else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3391: #if defined(PETSC_USE_COMPLEX)
3392:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3393: #else
3394:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3395: #endif
3396:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3397:               break;
3398:             case 1:
3399:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3400: #if defined(PETSC_USE_COMPLEX)
3401:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3402: #else
3403:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3404: #endif
3405:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3406:               if (!scal) {
3407:                 PetscBLASInt B_neigs2 = 0;

3409:                 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3410:                 PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3411:                 PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3412: #if defined(PETSC_USE_COMPLEX)
3413:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3414: #else
3415:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3416: #endif
3417:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3418:                 B_neigs += B_neigs2;
3419:               }
3420:               break;
3421:             case 2:
3422:               if (scal) {
3423:                 bb[0] = PETSC_MIN_REAL;
3424:                 bb[1] = 0;
3425: #if defined(PETSC_USE_COMPLEX)
3426:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3427: #else
3428:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3429: #endif
3430:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3431:               } else {
3432:                 PetscBLASInt B_neigs2 = 0;
3433:                 PetscBool    import = PETSC_FALSE;

3435:                 lthresh = PetscMax(lthresh,0.0);
3436:                 if (lthresh > 0.0) {
3437:                   bb[0] = PETSC_MIN_REAL;
3438:                   bb[1] = lthresh*lthresh;

3440:                   import = PETSC_TRUE;
3441: #if defined(PETSC_USE_COMPLEX)
3442:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3443: #else
3444:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3445: #endif
3446:                   PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3447:                 }
3448:                 bb[0] = PetscMax(lthresh*lthresh,uthresh);
3449:                 bb[1] = PETSC_MAX_REAL;
3450:                 if (import) {
3451:                   PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3452:                   PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3453:                 }
3454: #if defined(PETSC_USE_COMPLEX)
3455:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3456: #else
3457:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3458: #endif
3459:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3460:                 B_neigs += B_neigs2;
3461:               }
3462:               break;
3463:             case 3:
3464:               if (scal) {
3465:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3466:               } else {
3467:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3468:               }
3469:               if (!scal) {
3470:                 bb[0] = uthresh;
3471:                 bb[1] = PETSC_MAX_REAL;
3472: #if defined(PETSC_USE_COMPLEX)
3473:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3474: #else
3475:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3476: #endif
3477:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3478:               }
3479:               if (recipe_m > 0 && B_N - B_neigs > 0) {
3480:                 PetscBLASInt B_neigs2 = 0;

3482:                 B_IL = 1;
3483:                 PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3484:                 PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3485:                 PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3486: #if defined(PETSC_USE_COMPLEX)
3487:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3488: #else
3489:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3490: #endif
3491:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3492:                 B_neigs += B_neigs2;
3493:               }
3494:               break;
3495:             case 4:
3496:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3497: #if defined(PETSC_USE_COMPLEX)
3498:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3499: #else
3500:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3501: #endif
3502:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3503:               {
3504:                 PetscBLASInt B_neigs2 = 0;

3506:                 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3507:                 PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3508:                 PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3509: #if defined(PETSC_USE_COMPLEX)
3510:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3511: #else
3512:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3513: #endif
3514:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3515:                 B_neigs += B_neigs2;
3516:               }
3517:               break;
3518:             case 5: /* same as before: first compute all eigenvalues, then filter */
3519: #if defined(PETSC_USE_COMPLEX)
3520:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3521: #else
3522:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3523: #endif
3524:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3525:               {
3526:                 PetscInt e,k,ne;
3527:                 for (e=0,ne=0;e<B_neigs;e++) {
3528:                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3529:                     for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3530:                     eigs[ne] = eigs[e];
3531:                     ne++;
3532:                   }
3533:                 }
3534:                 PetscArraycpy(eigv,S,B_N*ne);
3535:                 B_neigs = ne;
3536:               }
3537:               break;
3538:             default:
3539:               SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3540:             }
3541:           }
3542:         } else if (!same_data) { /* this is just to see all the eigenvalues */
3543:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3544:           B_IL = 1;
3545: #if defined(PETSC_USE_COMPLEX)
3546:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3547: #else
3548:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3549: #endif
3550:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3551:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3552:           PetscInt k;
3553:           if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3554:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3555:           PetscBLASIntCast(nmax,&B_neigs);
3556:           nmin = nmax;
3557:           PetscArrayzero(eigv,subset_size*nmax);
3558:           for (k=0;k<nmax;k++) {
3559:             eigs[k] = 1./PETSC_SMALL;
3560:             eigv[k*(subset_size+1)] = 1.0;
3561:           }
3562:         }
3563:         PetscFPTrapPop();
3564:         if (B_ierr) {
3565:           if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3566:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3567:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3568:         }

3570:         if (B_neigs > nmax) {
3571:           if (pcbddc->dbg_flag) {
3572:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3573:           }
3574:           if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3575:           B_neigs = nmax;
3576:         }

3578:         nmin_s = PetscMin(nmin,B_N);
3579:         if (B_neigs < nmin_s) {
3580:           PetscBLASInt B_neigs2 = 0;

3582:           if (pcbddc->use_deluxe_scaling) {
3583:             if (scal) {
3584:               B_IU = nmin_s;
3585:               B_IL = B_neigs + 1;
3586:             } else {
3587:               B_IL = B_N - nmin_s + 1;
3588:               B_IU = B_N - B_neigs;
3589:             }
3590:           } else {
3591:             B_IL = B_neigs + 1;
3592:             B_IU = nmin_s;
3593:           }
3594:           if (pcbddc->dbg_flag) {
3595:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3596:           }
3597:           if (sub_schurs->is_symmetric) {
3598:             PetscInt j,k;
3599:             for (j=0;j<subset_size;j++) {
3600:               for (k=j;k<subset_size;k++) {
3601:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3602:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3603:               }
3604:             }
3605:           } else {
3606:             PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3607:             PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3608:           }
3609:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3610: #if defined(PETSC_USE_COMPLEX)
3611:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3612: #else
3613:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3614: #endif
3615:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3616:           PetscFPTrapPop();
3617:           B_neigs += B_neigs2;
3618:         }
3619:         if (B_ierr) {
3620:           if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3621:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3622:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3623:         }
3624:         if (pcbddc->dbg_flag) {
3625:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3626:           for (j=0;j<B_neigs;j++) {
3627:             if (eigs[j] == 0.0) {
3628:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3629:             } else {
3630:               if (pcbddc->use_deluxe_scaling) {
3631:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3632:               } else {
3633:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3634:               }
3635:             }
3636:           }
3637:         }
3638:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3639:     }
3640:     /* change the basis back to the original one */
3641:     if (sub_schurs->change) {
3642:       Mat change,phi,phit;

3644:       if (pcbddc->dbg_flag > 2) {
3645:         PetscInt ii;
3646:         for (ii=0;ii<B_neigs;ii++) {
3647:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3648:           for (j=0;j<B_N;j++) {
3649: #if defined(PETSC_USE_COMPLEX)
3650:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3651:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3652:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3653: #else
3654:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3655: #endif
3656:           }
3657:         }
3658:       }
3659:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3660:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3661:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3662:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3663:       MatDestroy(&phit);
3664:       MatDestroy(&phi);
3665:     }
3666:     maxneigs = PetscMax(B_neigs,maxneigs);
3667:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3668:     if (B_neigs) {
3669:       PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);

3671:       if (pcbddc->dbg_flag > 1) {
3672:         PetscInt ii;
3673:         for (ii=0;ii<B_neigs;ii++) {
3674:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3675:           for (j=0;j<B_N;j++) {
3676: #if defined(PETSC_USE_COMPLEX)
3677:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3678:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3679:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3680: #else
3681:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3682: #endif
3683:           }
3684:         }
3685:       }
3686:       PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);
3687:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3688:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3689:       cum++;
3690:     }
3691:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3692:     /* shift for next computation */
3693:     cumarray += subset_size*subset_size;
3694:   }
3695:   if (pcbddc->dbg_flag) {
3696:     PetscViewerFlush(pcbddc->dbg_viewer);
3697:   }

3699:   if (mss) {
3700:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3701:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3702:     /* destroy matrices (junk) */
3703:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3704:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3705:   }
3706:   if (allocated_S_St) {
3707:     PetscFree2(S,St);
3708:   }
3709:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3710: #if defined(PETSC_USE_COMPLEX)
3711:   PetscFree(rwork);
3712: #endif
3713:   if (pcbddc->dbg_flag) {
3714:     PetscInt maxneigs_r;
3715:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3716:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3717:   }
3718:   PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3719:   return(0);
3720: }

3722: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3723: {
3724:   PetscScalar    *coarse_submat_vals;

3728:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3729:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3730:   PCBDDCSetUpLocalScatters(pc);

3732:   /* Setup local neumann solver ksp_R */
3733:   /* PCBDDCSetUpLocalScatters should be called first! */
3734:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3736:   /*
3737:      Setup local correction and local part of coarse basis.
3738:      Gives back the dense local part of the coarse matrix in column major ordering
3739:   */
3740:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3742:   /* Compute total number of coarse nodes and setup coarse solver */
3743:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3745:   /* free */
3746:   PetscFree(coarse_submat_vals);
3747:   return(0);
3748: }

3750: PetscErrorCode PCBDDCResetCustomization(PC pc)
3751: {
3752:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3756:   ISDestroy(&pcbddc->user_primal_vertices);
3757:   ISDestroy(&pcbddc->user_primal_vertices_local);
3758:   ISDestroy(&pcbddc->NeumannBoundaries);
3759:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3760:   ISDestroy(&pcbddc->DirichletBoundaries);
3761:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3762:   PetscFree(pcbddc->onearnullvecs_state);
3763:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3764:   PCBDDCSetDofsSplitting(pc,0,NULL);
3765:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3766:   return(0);
3767: }

3769: PetscErrorCode PCBDDCResetTopography(PC pc)
3770: {
3771:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3772:   PetscInt       i;

3776:   MatDestroy(&pcbddc->nedcG);
3777:   ISDestroy(&pcbddc->nedclocal);
3778:   MatDestroy(&pcbddc->discretegradient);
3779:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3780:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3781:   MatDestroy(&pcbddc->switch_static_change);
3782:   VecDestroy(&pcbddc->work_change);
3783:   MatDestroy(&pcbddc->ConstraintMatrix);
3784:   MatDestroy(&pcbddc->divudotp);
3785:   ISDestroy(&pcbddc->divudotp_vl2l);
3786:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3787:   for (i=0;i<pcbddc->n_local_subs;i++) {
3788:     ISDestroy(&pcbddc->local_subs[i]);
3789:   }
3790:   pcbddc->n_local_subs = 0;
3791:   PetscFree(pcbddc->local_subs);
3792:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3793:   pcbddc->graphanalyzed        = PETSC_FALSE;
3794:   pcbddc->recompute_topography = PETSC_TRUE;
3795:   pcbddc->corner_selected      = PETSC_FALSE;
3796:   return(0);
3797: }

3799: PetscErrorCode PCBDDCResetSolvers(PC pc)
3800: {
3801:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3805:   VecDestroy(&pcbddc->coarse_vec);
3806:   if (pcbddc->coarse_phi_B) {
3807:     PetscScalar *array;
3808:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3809:     PetscFree(array);
3810:   }
3811:   MatDestroy(&pcbddc->coarse_phi_B);
3812:   MatDestroy(&pcbddc->coarse_phi_D);
3813:   MatDestroy(&pcbddc->coarse_psi_B);
3814:   MatDestroy(&pcbddc->coarse_psi_D);
3815:   VecDestroy(&pcbddc->vec1_P);
3816:   VecDestroy(&pcbddc->vec1_C);
3817:   MatDestroy(&pcbddc->local_auxmat2);
3818:   MatDestroy(&pcbddc->local_auxmat1);
3819:   VecDestroy(&pcbddc->vec1_R);
3820:   VecDestroy(&pcbddc->vec2_R);
3821:   ISDestroy(&pcbddc->is_R_local);
3822:   VecScatterDestroy(&pcbddc->R_to_B);
3823:   VecScatterDestroy(&pcbddc->R_to_D);
3824:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3825:   KSPReset(pcbddc->ksp_D);
3826:   KSPReset(pcbddc->ksp_R);
3827:   KSPReset(pcbddc->coarse_ksp);
3828:   MatDestroy(&pcbddc->local_mat);
3829:   PetscFree(pcbddc->primal_indices_local_idxs);
3830:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3831:   PetscFree(pcbddc->global_primal_indices);
3832:   ISDestroy(&pcbddc->coarse_subassembling);
3833:   MatDestroy(&pcbddc->benign_change);
3834:   VecDestroy(&pcbddc->benign_vec);
3835:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3836:   MatDestroy(&pcbddc->benign_B0);
3837:   PetscSFDestroy(&pcbddc->benign_sf);
3838:   if (pcbddc->benign_zerodiag_subs) {
3839:     PetscInt i;
3840:     for (i=0;i<pcbddc->benign_n;i++) {
3841:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3842:     }
3843:     PetscFree(pcbddc->benign_zerodiag_subs);
3844:   }
3845:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3846:   return(0);
3847: }

3849: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3850: {
3851:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3852:   PC_IS          *pcis = (PC_IS*)pc->data;
3853:   VecType        impVecType;
3854:   PetscInt       n_constraints,n_R,old_size;

3858:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3859:   n_R = pcis->n - pcbddc->n_vertices;
3860:   VecGetType(pcis->vec1_N,&impVecType);
3861:   /* local work vectors (try to avoid unneeded work)*/
3862:   /* R nodes */
3863:   old_size = -1;
3864:   if (pcbddc->vec1_R) {
3865:     VecGetSize(pcbddc->vec1_R,&old_size);
3866:   }
3867:   if (n_R != old_size) {
3868:     VecDestroy(&pcbddc->vec1_R);
3869:     VecDestroy(&pcbddc->vec2_R);
3870:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3871:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3872:     VecSetType(pcbddc->vec1_R,impVecType);
3873:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3874:   }
3875:   /* local primal dofs */
3876:   old_size = -1;
3877:   if (pcbddc->vec1_P) {
3878:     VecGetSize(pcbddc->vec1_P,&old_size);
3879:   }
3880:   if (pcbddc->local_primal_size != old_size) {
3881:     VecDestroy(&pcbddc->vec1_P);
3882:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3883:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3884:     VecSetType(pcbddc->vec1_P,impVecType);
3885:   }
3886:   /* local explicit constraints */
3887:   old_size = -1;
3888:   if (pcbddc->vec1_C) {
3889:     VecGetSize(pcbddc->vec1_C,&old_size);
3890:   }
3891:   if (n_constraints && n_constraints != old_size) {
3892:     VecDestroy(&pcbddc->vec1_C);
3893:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3894:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3895:     VecSetType(pcbddc->vec1_C,impVecType);
3896:   }
3897:   return(0);
3898: }

3900: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3901: {
3902:   PetscErrorCode  ierr;
3903:   /* pointers to pcis and pcbddc */
3904:   PC_IS*          pcis = (PC_IS*)pc->data;
3905:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3906:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3907:   /* submatrices of local problem */
3908:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3909:   /* submatrices of local coarse problem */
3910:   Mat             S_VV,S_CV,S_VC,S_CC;
3911:   /* working matrices */
3912:   Mat             C_CR;
3913:   /* additional working stuff */
3914:   PC              pc_R;
3915:   Mat             F,Brhs = NULL;
3916:   Vec             dummy_vec;
3917:   PetscBool       isLU,isCHOL,need_benign_correction,sparserhs;
3918:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3919:   PetscScalar     *work;
3920:   PetscInt        *idx_V_B;
3921:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3922:   PetscInt        i,n_R,n_D,n_B;
3923:   PetscScalar     one=1.0,m_one=-1.0;

3926:   if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3927:   PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);

3929:   /* Set Non-overlapping dimensions */
3930:   n_vertices = pcbddc->n_vertices;
3931:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3932:   n_B = pcis->n_B;
3933:   n_D = pcis->n - n_B;
3934:   n_R = pcis->n - n_vertices;

3936:   /* vertices in boundary numbering */
3937:   PetscMalloc1(n_vertices,&idx_V_B);
3938:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3939:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);

3941:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3942:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3943:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3944:   MatDenseSetLDA(S_VV,pcbddc->local_primal_size);
3945:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3946:   MatDenseSetLDA(S_CV,pcbddc->local_primal_size);
3947:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3948:   MatDenseSetLDA(S_VC,pcbddc->local_primal_size);
3949:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3950:   MatDenseSetLDA(S_CC,pcbddc->local_primal_size);

3952:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3953:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3954:   PCSetUp(pc_R);
3955:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3956:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3957:   lda_rhs = n_R;
3958:   need_benign_correction = PETSC_FALSE;
3959:   if (isLU || isCHOL) {
3960:     PCFactorGetMatrix(pc_R,&F);
3961:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3962:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3963:     MatFactorType      type;

3965:     F = reuse_solver->F;
3966:     MatGetFactorType(F,&type);
3967:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3968:     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3969:     MatGetSize(F,&lda_rhs,NULL);
3970:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3971:   } else F = NULL;

3973:   /* determine if we can use a sparse right-hand side */
3974:   sparserhs = PETSC_FALSE;
3975:   if (F) {
3976:     MatSolverType solver;

3978:     MatFactorGetSolverType(F,&solver);
3979:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3980:   }

3982:   /* allocate workspace */
3983:   n = 0;
3984:   if (n_constraints) {
3985:     n += lda_rhs*n_constraints;
3986:   }
3987:   if (n_vertices) {
3988:     n = PetscMax(2*lda_rhs*n_vertices,n);
3989:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3990:   }
3991:   if (!pcbddc->symmetric_primal) {
3992:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3993:   }
3994:   PetscMalloc1(n,&work);

3996:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3997:   dummy_vec = NULL;
3998:   if (need_benign_correction && lda_rhs != n_R && F) {
3999:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
4000:     VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
4001:     VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
4002:   }

4004:   MatDestroy(&pcbddc->local_auxmat1);
4005:   MatDestroy(&pcbddc->local_auxmat2);

4007:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4008:   if (n_constraints) {
4009:     Mat         M3,C_B;
4010:     IS          is_aux;
4011:     PetscScalar *array,*array2;

4013:     /* Extract constraints on R nodes: C_{CR}  */
4014:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
4015:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
4016:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

4018:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4019:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4020:     if (!sparserhs) {
4021:       PetscArrayzero(work,lda_rhs*n_constraints);
4022:       for (i=0;i<n_constraints;i++) {
4023:         const PetscScalar *row_cmat_values;
4024:         const PetscInt    *row_cmat_indices;
4025:         PetscInt          size_of_constraint,j;

4027:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4028:         for (j=0;j<size_of_constraint;j++) {
4029:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
4030:         }
4031:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4032:       }
4033:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
4034:     } else {
4035:       Mat tC_CR;

4037:       MatScale(C_CR,-1.0);
4038:       if (lda_rhs != n_R) {
4039:         PetscScalar *aa;
4040:         PetscInt    r,*ii,*jj;
4041:         PetscBool   done;

4043:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4044:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4045:         MatSeqAIJGetArray(C_CR,&aa);
4046:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4047:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4048:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4049:       } else {
4050:         PetscObjectReference((PetscObject)C_CR);
4051:         tC_CR = C_CR;
4052:       }
4053:       MatCreateTranspose(tC_CR,&Brhs);
4054:       MatDestroy(&tC_CR);
4055:     }
4056:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4057:     if (F) {
4058:       if (need_benign_correction) {
4059:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4061:         /* rhs is already zero on interior dofs, no need to change the rhs */
4062:         PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);
4063:       }
4064:       MatMatSolve(F,Brhs,local_auxmat2_R);
4065:       if (need_benign_correction) {
4066:         PetscScalar        *marr;
4067:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4069:         MatDenseGetArray(local_auxmat2_R,&marr);
4070:         if (lda_rhs != n_R) {
4071:           for (i=0;i<n_constraints;i++) {
4072:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4073:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4074:             VecResetArray(dummy_vec);
4075:           }
4076:         } else {
4077:           for (i=0;i<n_constraints;i++) {
4078:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4079:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4080:             VecResetArray(pcbddc->vec1_R);
4081:           }
4082:         }
4083:         MatDenseRestoreArray(local_auxmat2_R,&marr);
4084:       }
4085:     } else {
4086:       PetscScalar *marr;

4088:       MatDenseGetArray(local_auxmat2_R,&marr);
4089:       for (i=0;i<n_constraints;i++) {
4090:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4091:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4092:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4093:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4094:         VecResetArray(pcbddc->vec1_R);
4095:         VecResetArray(pcbddc->vec2_R);
4096:       }
4097:       MatDenseRestoreArray(local_auxmat2_R,&marr);
4098:     }
4099:     if (sparserhs) {
4100:       MatScale(C_CR,-1.0);
4101:     }
4102:     MatDestroy(&Brhs);
4103:     if (!pcbddc->switch_static) {
4104:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4105:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
4106:       MatDenseGetArray(local_auxmat2_R,&array2);
4107:       for (i=0;i<n_constraints;i++) {
4108:         VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4109:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
4110:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4111:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4112:         VecResetArray(pcis->vec1_B);
4113:         VecResetArray(pcbddc->vec1_R);
4114:       }
4115:       MatDenseRestoreArray(local_auxmat2_R,&array2);
4116:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4117:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4118:     } else {
4119:       if (lda_rhs != n_R) {
4120:         IS dummy;

4122:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4123:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4124:         ISDestroy(&dummy);
4125:       } else {
4126:         PetscObjectReference((PetscObject)local_auxmat2_R);
4127:         pcbddc->local_auxmat2 = local_auxmat2_R;
4128:       }
4129:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4130:     }
4131:     ISDestroy(&is_aux);
4132:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4133:     MatScale(M3,m_one);
4134:     if (isCHOL) {
4135:       MatCholeskyFactor(M3,NULL,NULL);
4136:     } else {
4137:       MatLUFactor(M3,NULL,NULL,NULL);
4138:     }
4139:     MatSeqDenseInvertFactors_Private(M3);
4140:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4141:     MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4142:     MatDestroy(&C_B);
4143:     MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4144:     MatDestroy(&M3);
4145:   }

4147:   /* Get submatrices from subdomain matrix */
4148:   if (n_vertices) {
4149: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4150:     PetscBool oldpin;
4151: #endif
4152:     PetscBool isaij;
4153:     IS        is_aux;

4155:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4156:       IS tis;

4158:       ISDuplicate(pcbddc->is_R_local,&tis);
4159:       ISSort(tis);
4160:       ISComplement(tis,0,pcis->n,&is_aux);
4161:       ISDestroy(&tis);
4162:     } else {
4163:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4164:     }
4165: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4166:     oldpin = pcbddc->local_mat->boundtocpu;
4167: #endif
4168:     MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);
4169:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4170:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4171:     PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);
4172:     if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4173:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4174:     }
4175:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4176: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4177:     MatBindToCPU(pcbddc->local_mat,oldpin);
4178: #endif
4179:     ISDestroy(&is_aux);
4180:   }

4182:   /* Matrix of coarse basis functions (local) */
4183:   if (pcbddc->coarse_phi_B) {
4184:     PetscInt on_B,on_primal,on_D=n_D;
4185:     if (pcbddc->coarse_phi_D) {
4186:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4187:     }
4188:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4189:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4190:       PetscScalar *marray;

4192:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4193:       PetscFree(marray);
4194:       MatDestroy(&pcbddc->coarse_phi_B);
4195:       MatDestroy(&pcbddc->coarse_psi_B);
4196:       MatDestroy(&pcbddc->coarse_phi_D);
4197:       MatDestroy(&pcbddc->coarse_psi_D);
4198:     }
4199:   }

4201:   if (!pcbddc->coarse_phi_B) {
4202:     PetscScalar *marr;

4204:     /* memory size */
4205:     n = n_B*pcbddc->local_primal_size;
4206:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4207:     if (!pcbddc->symmetric_primal) n *= 2;
4208:     PetscCalloc1(n,&marr);
4209:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4210:     marr += n_B*pcbddc->local_primal_size;
4211:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4212:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4213:       marr += n_D*pcbddc->local_primal_size;
4214:     }
4215:     if (!pcbddc->symmetric_primal) {
4216:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4217:       marr += n_B*pcbddc->local_primal_size;
4218:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4219:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4220:       }
4221:     } else {
4222:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4223:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4224:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4225:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4226:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4227:       }
4228:     }
4229:   }

4231:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4232:   p0_lidx_I = NULL;
4233:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4234:     const PetscInt *idxs;

4236:     ISGetIndices(pcis->is_I_local,&idxs);
4237:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4238:     for (i=0;i<pcbddc->benign_n;i++) {
4239:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4240:     }
4241:     ISRestoreIndices(pcis->is_I_local,&idxs);
4242:   }

4244:   /* vertices */
4245:   if (n_vertices) {
4246:     PetscBool restoreavr = PETSC_FALSE;

4248:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

4250:     if (n_R) {
4251:       Mat               A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4252:       PetscBLASInt      B_N,B_one = 1;
4253:       const PetscScalar *x;
4254:       PetscScalar       *y;

4256:       MatScale(A_RV,m_one);
4257:       if (need_benign_correction) {
4258:         ISLocalToGlobalMapping RtoN;
4259:         IS                     is_p0;
4260:         PetscInt               *idxs_p0,n;

4262:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4263:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4264:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4265:         if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4266:         ISLocalToGlobalMappingDestroy(&RtoN);
4267:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4268:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4269:         ISDestroy(&is_p0);
4270:       }

4272:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4273:       if (!sparserhs || need_benign_correction) {
4274:         if (lda_rhs == n_R) {
4275:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4276:         } else {
4277:           PetscScalar    *av,*array;
4278:           const PetscInt *xadj,*adjncy;
4279:           PetscInt       n;
4280:           PetscBool      flg_row;

4282:           array = work+lda_rhs*n_vertices;
4283:           PetscArrayzero(array,lda_rhs*n_vertices);
4284:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4285:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4286:           MatSeqAIJGetArray(A_RV,&av);
4287:           for (i=0;i<n;i++) {
4288:             PetscInt j;
4289:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4290:           }
4291:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4292:           MatDestroy(&A_RV);
4293:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4294:         }
4295:         if (need_benign_correction) {
4296:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4297:           PetscScalar        *marr;

4299:           MatDenseGetArray(A_RV,&marr);
4300:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4302:                  | 0 0  0 | (V)
4303:              L = | 0 0 -1 | (P-p0)
4304:                  | 0 0 -1 | (p0)

4306:           */
4307:           for (i=0;i<reuse_solver->benign_n;i++) {
4308:             const PetscScalar *vals;
4309:             const PetscInt    *idxs,*idxs_zero;
4310:             PetscInt          n,j,nz;

4312:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4313:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4314:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4315:             for (j=0;j<n;j++) {
4316:               PetscScalar val = vals[j];
4317:               PetscInt    k,col = idxs[j];
4318:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4319:             }
4320:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4321:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4322:           }
4323:           MatDenseRestoreArray(A_RV,&marr);
4324:         }
4325:         PetscObjectReference((PetscObject)A_RV);
4326:         Brhs = A_RV;
4327:       } else {
4328:         Mat tA_RVT,A_RVT;

4330:         if (!pcbddc->symmetric_primal) {
4331:           /* A_RV already scaled by -1 */
4332:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4333:         } else {
4334:           restoreavr = PETSC_TRUE;
4335:           MatScale(A_VR,-1.0);
4336:           PetscObjectReference((PetscObject)A_VR);
4337:           A_RVT = A_VR;
4338:         }
4339:         if (lda_rhs != n_R) {
4340:           PetscScalar *aa;
4341:           PetscInt    r,*ii,*jj;
4342:           PetscBool   done;

4344:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4345:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4346:           MatSeqAIJGetArray(A_RVT,&aa);
4347:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4348:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4349:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4350:         } else {
4351:           PetscObjectReference((PetscObject)A_RVT);
4352:           tA_RVT = A_RVT;
4353:         }
4354:         MatCreateTranspose(tA_RVT,&Brhs);
4355:         MatDestroy(&tA_RVT);
4356:         MatDestroy(&A_RVT);
4357:       }
4358:       if (F) {
4359:         /* need to correct the rhs */
4360:         if (need_benign_correction) {
4361:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4362:           PetscScalar        *marr;

4364:           MatDenseGetArray(Brhs,&marr);
4365:           if (lda_rhs != n_R) {
4366:             for (i=0;i<n_vertices;i++) {
4367:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4368:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4369:               VecResetArray(dummy_vec);
4370:             }
4371:           } else {
4372:             for (i=0;i<n_vertices;i++) {
4373:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4374:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4375:               VecResetArray(pcbddc->vec1_R);
4376:             }
4377:           }
4378:           MatDenseRestoreArray(Brhs,&marr);
4379:         }
4380:         MatMatSolve(F,Brhs,A_RRmA_RV);
4381:         if (restoreavr) {
4382:           MatScale(A_VR,-1.0);
4383:         }
4384:         /* need to correct the solution */
4385:         if (need_benign_correction) {
4386:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4387:           PetscScalar        *marr;

4389:           MatDenseGetArray(A_RRmA_RV,&marr);
4390:           if (lda_rhs != n_R) {
4391:             for (i=0;i<n_vertices;i++) {
4392:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4393:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4394:               VecResetArray(dummy_vec);
4395:             }
4396:           } else {
4397:             for (i=0;i<n_vertices;i++) {
4398:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4399:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4400:               VecResetArray(pcbddc->vec1_R);
4401:             }
4402:           }
4403:           MatDenseRestoreArray(A_RRmA_RV,&marr);
4404:         }
4405:       } else {
4406:         MatDenseGetArray(Brhs,&y);
4407:         for (i=0;i<n_vertices;i++) {
4408:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4409:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4410:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4411:           KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4412:           VecResetArray(pcbddc->vec1_R);
4413:           VecResetArray(pcbddc->vec2_R);
4414:         }
4415:         MatDenseRestoreArray(Brhs,&y);
4416:       }
4417:       MatDestroy(&A_RV);
4418:       MatDestroy(&Brhs);
4419:       /* S_VV and S_CV */
4420:       if (n_constraints) {
4421:         Mat B;

4423:         PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);
4424:         for (i=0;i<n_vertices;i++) {
4425:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4426:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4427:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4428:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4429:           VecResetArray(pcis->vec1_B);
4430:           VecResetArray(pcbddc->vec1_R);
4431:         }
4432:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4433:         /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4434:         MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);
4435:         MatProductSetType(S_CV,MATPRODUCT_AB);
4436:         MatProductSetFromOptions(S_CV);
4437:         MatProductSymbolic(S_CV);
4438:         MatProductNumeric(S_CV);
4439:         MatProductClear(S_CV);

4441:         MatDestroy(&B);
4442:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4443:         /* Reuse B = local_auxmat2_R * S_CV */
4444:         MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);
4445:         MatProductSetType(B,MATPRODUCT_AB);
4446:         MatProductSetFromOptions(B);
4447:         MatProductSymbolic(B);
4448:         MatProductNumeric(B);

4450:         MatScale(S_CV,m_one);
4451:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4452:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4453:         MatDestroy(&B);
4454:       }
4455:       if (lda_rhs != n_R) {
4456:         MatDestroy(&A_RRmA_RV);
4457:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4458:         MatDenseSetLDA(A_RRmA_RV,lda_rhs);
4459:       }
4460:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4461:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4462:       if (need_benign_correction) {
4463:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4464:         PetscScalar        *marr,*sums;

4466:         PetscMalloc1(n_vertices,&sums);
4467:         MatDenseGetArray(S_VVt,&marr);
4468:         for (i=0;i<reuse_solver->benign_n;i++) {
4469:           const PetscScalar *vals;
4470:           const PetscInt    *idxs,*idxs_zero;
4471:           PetscInt          n,j,nz;

4473:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4474:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4475:           for (j=0;j<n_vertices;j++) {
4476:             PetscInt k;
4477:             sums[j] = 0.;
4478:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4479:           }
4480:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4481:           for (j=0;j<n;j++) {
4482:             PetscScalar val = vals[j];
4483:             PetscInt k;
4484:             for (k=0;k<n_vertices;k++) {
4485:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4486:             }
4487:           }
4488:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4489:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4490:         }
4491:         PetscFree(sums);
4492:         MatDenseRestoreArray(S_VVt,&marr);
4493:         MatDestroy(&A_RV_bcorr);
4494:       }
4495:       MatDestroy(&A_RRmA_RV);
4496:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4497:       MatDenseGetArrayRead(A_VV,&x);
4498:       MatDenseGetArray(S_VVt,&y);
4499:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4500:       MatDenseRestoreArrayRead(A_VV,&x);
4501:       MatDenseRestoreArray(S_VVt,&y);
4502:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4503:       MatDestroy(&S_VVt);
4504:     } else {
4505:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4506:     }
4507:     MatDestroy(&A_VV);

4509:     /* coarse basis functions */
4510:     for (i=0;i<n_vertices;i++) {
4511:       PetscScalar *y;

4513:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4514:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4515:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4516:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4517:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4518:       y[n_B*i+idx_V_B[i]] = 1.0;
4519:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4520:       VecResetArray(pcis->vec1_B);

4522:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4523:         PetscInt j;

4525:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4526:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4527:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4528:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4529:         VecResetArray(pcis->vec1_D);
4530:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4531:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4532:       }
4533:       VecResetArray(pcbddc->vec1_R);
4534:     }
4535:     /* if n_R == 0 the object is not destroyed */
4536:     MatDestroy(&A_RV);
4537:   }
4538:   VecDestroy(&dummy_vec);

4540:   if (n_constraints) {
4541:     Mat B;

4543:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4544:     MatScale(S_CC,m_one);
4545:     MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);
4546:     MatProductSetType(B,MATPRODUCT_AB);
4547:     MatProductSetFromOptions(B);
4548:     MatProductSymbolic(B);
4549:     MatProductNumeric(B);

4551:     MatScale(S_CC,m_one);
4552:     if (n_vertices) {
4553:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4554:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4555:       } else {
4556:         Mat S_VCt;

4558:         if (lda_rhs != n_R) {
4559:           MatDestroy(&B);
4560:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4561:           MatDenseSetLDA(B,lda_rhs);
4562:         }
4563:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4564:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4565:         MatDestroy(&S_VCt);
4566:       }
4567:     }
4568:     MatDestroy(&B);
4569:     /* coarse basis functions */
4570:     for (i=0;i<n_constraints;i++) {
4571:       PetscScalar *y;

4573:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4574:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4575:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4576:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4577:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4578:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4579:       VecResetArray(pcis->vec1_B);
4580:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4581:         PetscInt j;

4583:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4584:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4585:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4586:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4587:         VecResetArray(pcis->vec1_D);
4588:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4589:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4590:       }
4591:       VecResetArray(pcbddc->vec1_R);
4592:     }
4593:   }
4594:   if (n_constraints) {
4595:     MatDestroy(&local_auxmat2_R);
4596:   }
4597:   PetscFree(p0_lidx_I);

4599:   /* coarse matrix entries relative to B_0 */
4600:   if (pcbddc->benign_n) {
4601:     Mat               B0_B,B0_BPHI;
4602:     IS                is_dummy;
4603:     const PetscScalar *data;
4604:     PetscInt          j;

4606:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4607:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4608:     ISDestroy(&is_dummy);
4609:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4610:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4611:     MatDenseGetArrayRead(B0_BPHI,&data);
4612:     for (j=0;j<pcbddc->benign_n;j++) {
4613:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4614:       for (i=0;i<pcbddc->local_primal_size;i++) {
4615:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4616:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4617:       }
4618:     }
4619:     MatDenseRestoreArrayRead(B0_BPHI,&data);
4620:     MatDestroy(&B0_B);
4621:     MatDestroy(&B0_BPHI);
4622:   }

4624:   /* compute other basis functions for non-symmetric problems */
4625:   if (!pcbddc->symmetric_primal) {
4626:     Mat         B_V=NULL,B_C=NULL;
4627:     PetscScalar *marray;

4629:     if (n_constraints) {
4630:       Mat S_CCT,C_CRT;

4632:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4633:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4634:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4635:       MatDestroy(&S_CCT);
4636:       if (n_vertices) {
4637:         Mat S_VCT;

4639:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4640:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4641:         MatDestroy(&S_VCT);
4642:       }
4643:       MatDestroy(&C_CRT);
4644:     } else {
4645:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4646:     }
4647:     if (n_vertices && n_R) {
4648:       PetscScalar    *av,*marray;
4649:       const PetscInt *xadj,*adjncy;
4650:       PetscInt       n;
4651:       PetscBool      flg_row;

4653:       /* B_V = B_V - A_VR^T */
4654:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4655:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4656:       MatSeqAIJGetArray(A_VR,&av);
4657:       MatDenseGetArray(B_V,&marray);
4658:       for (i=0;i<n;i++) {
4659:         PetscInt j;
4660:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4661:       }
4662:       MatDenseRestoreArray(B_V,&marray);
4663:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4664:       MatDestroy(&A_VR);
4665:     }

4667:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4668:     if (n_vertices) {
4669:       MatDenseGetArray(B_V,&marray);
4670:       for (i=0;i<n_vertices;i++) {
4671:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4672:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4673:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4674:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4675:         VecResetArray(pcbddc->vec1_R);
4676:         VecResetArray(pcbddc->vec2_R);
4677:       }
4678:       MatDenseRestoreArray(B_V,&marray);
4679:     }
4680:     if (B_C) {
4681:       MatDenseGetArray(B_C,&marray);
4682:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4683:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4684:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4685:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4686:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4687:         VecResetArray(pcbddc->vec1_R);
4688:         VecResetArray(pcbddc->vec2_R);
4689:       }
4690:       MatDenseRestoreArray(B_C,&marray);
4691:     }
4692:     /* coarse basis functions */
4693:     for (i=0;i<pcbddc->local_primal_size;i++) {
4694:       PetscScalar *y;

4696:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4697:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4698:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4699:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4700:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4701:       if (i<n_vertices) {
4702:         y[n_B*i+idx_V_B[i]] = 1.0;
4703:       }
4704:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4705:       VecResetArray(pcis->vec1_B);

4707:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4708:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4709:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4710:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4711:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4712:         VecResetArray(pcis->vec1_D);
4713:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4714:       }
4715:       VecResetArray(pcbddc->vec1_R);
4716:     }
4717:     MatDestroy(&B_V);
4718:     MatDestroy(&B_C);
4719:   }

4721:   /* free memory */
4722:   PetscFree(idx_V_B);
4723:   MatDestroy(&S_VV);
4724:   MatDestroy(&S_CV);
4725:   MatDestroy(&S_VC);
4726:   MatDestroy(&S_CC);
4727:   PetscFree(work);
4728:   if (n_vertices) {
4729:     MatDestroy(&A_VR);
4730:   }
4731:   if (n_constraints) {
4732:     MatDestroy(&C_CR);
4733:   }
4734:   PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);

4736:   /* Checking coarse_sub_mat and coarse basis functios */
4737:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4738:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4739:   if (pcbddc->dbg_flag) {
4740:     Mat         coarse_sub_mat;
4741:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4742:     Mat         coarse_phi_D,coarse_phi_B;
4743:     Mat         coarse_psi_D,coarse_psi_B;
4744:     Mat         A_II,A_BB,A_IB,A_BI;
4745:     Mat         C_B,CPHI;
4746:     IS          is_dummy;
4747:     Vec         mones;
4748:     MatType     checkmattype=MATSEQAIJ;
4749:     PetscReal   real_value;

4751:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4752:       Mat A;
4753:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4754:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4755:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4756:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4757:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4758:       MatDestroy(&A);
4759:     } else {
4760:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4761:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4762:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4763:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4764:     }
4765:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4766:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4767:     if (!pcbddc->symmetric_primal) {
4768:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4769:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4770:     }
4771:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4773:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4774:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4775:     PetscViewerFlush(pcbddc->dbg_viewer);
4776:     if (!pcbddc->symmetric_primal) {
4777:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4778:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4779:       MatDestroy(&AUXMAT);
4780:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4781:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4782:       MatDestroy(&AUXMAT);
4783:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4784:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4785:       MatDestroy(&AUXMAT);
4786:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4787:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4788:       MatDestroy(&AUXMAT);
4789:     } else {
4790:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4791:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4792:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4793:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4794:       MatDestroy(&AUXMAT);
4795:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4796:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4797:       MatDestroy(&AUXMAT);
4798:     }
4799:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4800:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4801:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4802:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4803:     if (pcbddc->benign_n) {
4804:       Mat               B0_B,B0_BPHI;
4805:       const PetscScalar *data2;
4806:       PetscScalar       *data;
4807:       PetscInt          j;

4809:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4810:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4811:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4812:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4813:       MatDenseGetArray(TM1,&data);
4814:       MatDenseGetArrayRead(B0_BPHI,&data2);
4815:       for (j=0;j<pcbddc->benign_n;j++) {
4816:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4817:         for (i=0;i<pcbddc->local_primal_size;i++) {
4818:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4819:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4820:         }
4821:       }
4822:       MatDenseRestoreArray(TM1,&data);
4823:       MatDenseRestoreArrayRead(B0_BPHI,&data2);
4824:       MatDestroy(&B0_B);
4825:       ISDestroy(&is_dummy);
4826:       MatDestroy(&B0_BPHI);
4827:     }
4828: #if 0
4829:   {
4830:     PetscViewer viewer;
4831:     char filename[256];
4832:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4833:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4834:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4835:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4836:     MatView(coarse_sub_mat,viewer);
4837:     PetscObjectSetName((PetscObject)TM1,"projected");
4838:     MatView(TM1,viewer);
4839:     if (pcbddc->coarse_phi_B) {
4840:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4841:       MatView(pcbddc->coarse_phi_B,viewer);
4842:     }
4843:     if (pcbddc->coarse_phi_D) {
4844:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4845:       MatView(pcbddc->coarse_phi_D,viewer);
4846:     }
4847:     if (pcbddc->coarse_psi_B) {
4848:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4849:       MatView(pcbddc->coarse_psi_B,viewer);
4850:     }
4851:     if (pcbddc->coarse_psi_D) {
4852:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4853:       MatView(pcbddc->coarse_psi_D,viewer);
4854:     }
4855:     PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4856:     MatView(pcbddc->local_mat,viewer);
4857:     PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4858:     MatView(pcbddc->ConstraintMatrix,viewer);
4859:     PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4860:     ISView(pcis->is_I_local,viewer);
4861:     PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4862:     ISView(pcis->is_B_local,viewer);
4863:     PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4864:     ISView(pcbddc->is_R_local,viewer);
4865:     PetscViewerDestroy(&viewer);
4866:   }
4867: #endif
4868:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4869:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4870:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4871:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4873:     /* check constraints */
4874:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4875:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4876:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4877:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4878:     } else {
4879:       PetscScalar *data;
4880:       Mat         tmat;
4881:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4882:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4883:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4884:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4885:       MatDestroy(&tmat);
4886:     }
4887:     MatCreateVecs(CPHI,&mones,NULL);
4888:     VecSet(mones,-1.0);
4889:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4890:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4891:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4892:     if (!pcbddc->symmetric_primal) {
4893:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4894:       VecSet(mones,-1.0);
4895:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4896:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4897:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4898:     }
4899:     MatDestroy(&C_B);
4900:     MatDestroy(&CPHI);
4901:     ISDestroy(&is_dummy);
4902:     VecDestroy(&mones);
4903:     PetscViewerFlush(pcbddc->dbg_viewer);
4904:     MatDestroy(&A_II);
4905:     MatDestroy(&A_BB);
4906:     MatDestroy(&A_IB);
4907:     MatDestroy(&A_BI);
4908:     MatDestroy(&TM1);
4909:     MatDestroy(&TM2);
4910:     MatDestroy(&TM3);
4911:     MatDestroy(&TM4);
4912:     MatDestroy(&coarse_phi_D);
4913:     MatDestroy(&coarse_phi_B);
4914:     if (!pcbddc->symmetric_primal) {
4915:       MatDestroy(&coarse_psi_D);
4916:       MatDestroy(&coarse_psi_B);
4917:     }
4918:     MatDestroy(&coarse_sub_mat);
4919:   }
4920:   /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4921:   {
4922:     PetscBool gpu;

4924:     PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);
4925:     if (gpu) {
4926:       if (pcbddc->local_auxmat1) {
4927:         MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);
4928:       }
4929:       if (pcbddc->local_auxmat2) {
4930:         MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);
4931:       }
4932:       if (pcbddc->coarse_phi_B) {
4933:         MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);
4934:       }
4935:       if (pcbddc->coarse_phi_D) {
4936:         MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);
4937:       }
4938:       if (pcbddc->coarse_psi_B) {
4939:         MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);
4940:       }
4941:       if (pcbddc->coarse_psi_D) {
4942:         MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);
4943:       }
4944:     }
4945:   }
4946:   /* get back data */
4947:   *coarse_submat_vals_n = coarse_submat_vals;
4948:   return(0);
4949: }

4951: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4952: {
4953:   Mat            *work_mat;
4954:   IS             isrow_s,iscol_s;
4955:   PetscBool      rsorted,csorted;
4956:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4960:   ISSorted(isrow,&rsorted);
4961:   ISSorted(iscol,&csorted);
4962:   ISGetLocalSize(isrow,&rsize);
4963:   ISGetLocalSize(iscol,&csize);

4965:   if (!rsorted) {
4966:     const PetscInt *idxs;
4967:     PetscInt *idxs_sorted,i;

4969:     PetscMalloc1(rsize,&idxs_perm_r);
4970:     PetscMalloc1(rsize,&idxs_sorted);
4971:     for (i=0;i<rsize;i++) {
4972:       idxs_perm_r[i] = i;
4973:     }
4974:     ISGetIndices(isrow,&idxs);
4975:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4976:     for (i=0;i<rsize;i++) {
4977:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4978:     }
4979:     ISRestoreIndices(isrow,&idxs);
4980:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4981:   } else {
4982:     PetscObjectReference((PetscObject)isrow);
4983:     isrow_s = isrow;
4984:   }

4986:   if (!csorted) {
4987:     if (isrow == iscol) {
4988:       PetscObjectReference((PetscObject)isrow_s);
4989:       iscol_s = isrow_s;
4990:     } else {
4991:       const PetscInt *idxs;
4992:       PetscInt       *idxs_sorted,i;

4994:       PetscMalloc1(csize,&idxs_perm_c);
4995:       PetscMalloc1(csize,&idxs_sorted);
4996:       for (i=0;i<csize;i++) {
4997:         idxs_perm_c[i] = i;
4998:       }
4999:       ISGetIndices(iscol,&idxs);
5000:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
5001:       for (i=0;i<csize;i++) {
5002:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
5003:       }
5004:       ISRestoreIndices(iscol,&idxs);
5005:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
5006:     }
5007:   } else {
5008:     PetscObjectReference((PetscObject)iscol);
5009:     iscol_s = iscol;
5010:   }

5012:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

5014:   if (!rsorted || !csorted) {
5015:     Mat      new_mat;
5016:     IS       is_perm_r,is_perm_c;

5018:     if (!rsorted) {
5019:       PetscInt *idxs_r,i;
5020:       PetscMalloc1(rsize,&idxs_r);
5021:       for (i=0;i<rsize;i++) {
5022:         idxs_r[idxs_perm_r[i]] = i;
5023:       }
5024:       PetscFree(idxs_perm_r);
5025:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
5026:     } else {
5027:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
5028:     }
5029:     ISSetPermutation(is_perm_r);

5031:     if (!csorted) {
5032:       if (isrow_s == iscol_s) {
5033:         PetscObjectReference((PetscObject)is_perm_r);
5034:         is_perm_c = is_perm_r;
5035:       } else {
5036:         PetscInt *idxs_c,i;
5037:         if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
5038:         PetscMalloc1(csize,&idxs_c);
5039:         for (i=0;i<csize;i++) {
5040:           idxs_c[idxs_perm_c[i]] = i;
5041:         }
5042:         PetscFree(idxs_perm_c);
5043:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
5044:       }
5045:     } else {
5046:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
5047:     }
5048:     ISSetPermutation(is_perm_c);

5050:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
5051:     MatDestroy(&work_mat[0]);
5052:     work_mat[0] = new_mat;
5053:     ISDestroy(&is_perm_r);
5054:     ISDestroy(&is_perm_c);
5055:   }

5057:   PetscObjectReference((PetscObject)work_mat[0]);
5058:   *B = work_mat[0];
5059:   MatDestroyMatrices(1,&work_mat);
5060:   ISDestroy(&isrow_s);
5061:   ISDestroy(&iscol_s);
5062:   return(0);
5063: }

5065: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5066: {
5067:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5068:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5069:   Mat            new_mat,lA;
5070:   IS             is_local,is_global;
5071:   PetscInt       local_size;
5072:   PetscBool      isseqaij;

5076:   MatDestroy(&pcbddc->local_mat);
5077:   MatGetSize(matis->A,&local_size,NULL);
5078:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5079:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
5080:   ISDestroy(&is_local);
5081:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5082:   ISDestroy(&is_global);

5084:   if (pcbddc->dbg_flag) {
5085:     Vec       x,x_change;
5086:     PetscReal error;

5088:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5089:     VecSetRandom(x,NULL);
5090:     MatMult(ChangeOfBasisMatrix,x,x_change);
5091:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5092:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5093:     MatMult(new_mat,matis->x,matis->y);
5094:     if (!pcbddc->change_interior) {
5095:       const PetscScalar *x,*y,*v;
5096:       PetscReal         lerror = 0.;
5097:       PetscInt          i;

5099:       VecGetArrayRead(matis->x,&x);
5100:       VecGetArrayRead(matis->y,&y);
5101:       VecGetArrayRead(matis->counter,&v);
5102:       for (i=0;i<local_size;i++)
5103:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5104:           lerror = PetscAbsScalar(x[i]-y[i]);
5105:       VecRestoreArrayRead(matis->x,&x);
5106:       VecRestoreArrayRead(matis->y,&y);
5107:       VecRestoreArrayRead(matis->counter,&v);
5108:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
5109:       if (error > PETSC_SMALL) {
5110:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5111:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5112:         } else {
5113:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5114:         }
5115:       }
5116:     }
5117:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5118:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5119:     VecAXPY(x,-1.0,x_change);
5120:     VecNorm(x,NORM_INFINITY,&error);
5121:     if (error > PETSC_SMALL) {
5122:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5123:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5124:       } else {
5125:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5126:       }
5127:     }
5128:     VecDestroy(&x);
5129:     VecDestroy(&x_change);
5130:   }

5132:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5133:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

5135:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5136:   PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5137:   if (isseqaij) {
5138:     MatDestroy(&pcbddc->local_mat);
5139:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5140:     if (lA) {
5141:       Mat work;
5142:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5143:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5144:       MatDestroy(&work);
5145:     }
5146:   } else {
5147:     Mat work_mat;

5149:     MatDestroy(&pcbddc->local_mat);
5150:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5151:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5152:     MatDestroy(&work_mat);
5153:     if (lA) {
5154:       Mat work;
5155:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5156:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5157:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5158:       MatDestroy(&work);
5159:     }
5160:   }
5161:   if (matis->A->symmetric_set) {
5162:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5163: #if !defined(PETSC_USE_COMPLEX)
5164:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5165: #endif
5166:   }
5167:   MatDestroy(&new_mat);
5168:   return(0);
5169: }

5171: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5172: {
5173:   PC_IS*          pcis = (PC_IS*)(pc->data);
5174:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
5175:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5176:   PetscInt        *idx_R_local=NULL;
5177:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
5178:   PetscInt        vbs,bs;
5179:   PetscBT         bitmask=NULL;
5180:   PetscErrorCode  ierr;

5183:   /*
5184:     No need to setup local scatters if
5185:       - primal space is unchanged
5186:         AND
5187:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5188:         AND
5189:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5190:   */
5191:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5192:     return(0);
5193:   }
5194:   /* destroy old objects */
5195:   ISDestroy(&pcbddc->is_R_local);
5196:   VecScatterDestroy(&pcbddc->R_to_B);
5197:   VecScatterDestroy(&pcbddc->R_to_D);
5198:   /* Set Non-overlapping dimensions */
5199:   n_B = pcis->n_B;
5200:   n_D = pcis->n - n_B;
5201:   n_vertices = pcbddc->n_vertices;

5203:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

5205:   /* create auxiliary bitmask and allocate workspace */
5206:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5207:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5208:     PetscBTCreate(pcis->n,&bitmask);
5209:     for (i=0;i<n_vertices;i++) {
5210:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5211:     }

5213:     for (i=0, n_R=0; i<pcis->n; i++) {
5214:       if (!PetscBTLookup(bitmask,i)) {
5215:         idx_R_local[n_R++] = i;
5216:       }
5217:     }
5218:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5219:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5221:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5222:     ISGetLocalSize(reuse_solver->is_R,&n_R);
5223:   }

5225:   /* Block code */
5226:   vbs = 1;
5227:   MatGetBlockSize(pcbddc->local_mat,&bs);
5228:   if (bs>1 && !(n_vertices%bs)) {
5229:     PetscBool is_blocked = PETSC_TRUE;
5230:     PetscInt  *vary;
5231:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5232:       PetscMalloc1(pcis->n/bs,&vary);
5233:       PetscArrayzero(vary,pcis->n/bs);
5234:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5235:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5236:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5237:       for (i=0; i<pcis->n/bs; i++) {
5238:         if (vary[i]!=0 && vary[i]!=bs) {
5239:           is_blocked = PETSC_FALSE;
5240:           break;
5241:         }
5242:       }
5243:       PetscFree(vary);
5244:     } else {
5245:       /* Verify directly the R set */
5246:       for (i=0; i<n_R/bs; i++) {
5247:         PetscInt j,node=idx_R_local[bs*i];
5248:         for (j=1; j<bs; j++) {
5249:           if (node != idx_R_local[bs*i+j]-j) {
5250:             is_blocked = PETSC_FALSE;
5251:             break;
5252:           }
5253:         }
5254:       }
5255:     }
5256:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5257:       vbs = bs;
5258:       for (i=0;i<n_R/vbs;i++) {
5259:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5260:       }
5261:     }
5262:   }
5263:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5264:   if (sub_schurs && sub_schurs->reuse_solver) {
5265:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5267:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5268:     ISDestroy(&reuse_solver->is_R);
5269:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
5270:     reuse_solver->is_R = pcbddc->is_R_local;
5271:   } else {
5272:     PetscFree(idx_R_local);
5273:   }

5275:   /* print some info if requested */
5276:   if (pcbddc->dbg_flag) {
5277:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5278:     PetscViewerFlush(pcbddc->dbg_viewer);
5279:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5280:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5281:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5282:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5283:     PetscViewerFlush(pcbddc->dbg_viewer);
5284:   }

5286:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5287:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5288:     IS       is_aux1,is_aux2;
5289:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

5291:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5292:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5293:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5294:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5295:     for (i=0; i<n_D; i++) {
5296:       PetscBTSet(bitmask,is_indices[i]);
5297:     }
5298:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5299:     for (i=0, j=0; i<n_R; i++) {
5300:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5301:         aux_array1[j++] = i;
5302:       }
5303:     }
5304:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5305:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5306:     for (i=0, j=0; i<n_B; i++) {
5307:       if (!PetscBTLookup(bitmask,is_indices[i])) {
5308:         aux_array2[j++] = i;
5309:       }
5310:     }
5311:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5312:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5313:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5314:     ISDestroy(&is_aux1);
5315:     ISDestroy(&is_aux2);

5317:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5318:       PetscMalloc1(n_D,&aux_array1);
5319:       for (i=0, j=0; i<n_R; i++) {
5320:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
5321:           aux_array1[j++] = i;
5322:         }
5323:       }
5324:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5325:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5326:       ISDestroy(&is_aux1);
5327:     }
5328:     PetscBTDestroy(&bitmask);
5329:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5330:   } else {
5331:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5332:     IS                 tis;
5333:     PetscInt           schur_size;

5335:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
5336:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5337:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5338:     ISDestroy(&tis);
5339:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5340:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5341:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5342:       ISDestroy(&tis);
5343:     }
5344:   }
5345:   return(0);
5346: }

5348: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5349: {
5350:   MatNullSpace   NullSpace;
5351:   Mat            dmat;
5352:   const Vec      *nullvecs;
5353:   Vec            v,v2,*nullvecs2;
5354:   VecScatter     sct = NULL;
5355:   PetscContainer c;
5356:   PetscScalar    *ddata;
5357:   PetscInt       k,nnsp_size,bsiz,bsiz2,n,N,bs;
5358:   PetscBool      nnsp_has_cnst;

5362:   if (!is && !B) { /* MATIS */
5363:     Mat_IS* matis = (Mat_IS*)A->data;

5365:     if (!B) {
5366:       MatISGetLocalMat(A,&B);
5367:     }
5368:     sct  = matis->cctx;
5369:     PetscObjectReference((PetscObject)sct);
5370:   } else {
5371:     MatGetNullSpace(B,&NullSpace);
5372:     if (!NullSpace) {
5373:       MatGetNearNullSpace(B,&NullSpace);
5374:     }
5375:     if (NullSpace) return(0);
5376:   }
5377:   MatGetNullSpace(A,&NullSpace);
5378:   if (!NullSpace) {
5379:     MatGetNearNullSpace(A,&NullSpace);
5380:   }
5381:   if (!NullSpace) return(0);

5383:   MatCreateVecs(A,&v,NULL);
5384:   MatCreateVecs(B,&v2,NULL);
5385:   if (!sct) {
5386:     VecScatterCreate(v,is,v2,NULL,&sct);
5387:   }
5388:   MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5389:   bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst;
5390:   PetscMalloc1(bsiz,&nullvecs2);
5391:   VecGetBlockSize(v2,&bs);
5392:   VecGetSize(v2,&N);
5393:   VecGetLocalSize(v2,&n);
5394:   PetscMalloc1(n*bsiz,&ddata);
5395:   for (k=0;k<nnsp_size;k++) {
5396:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);
5397:     VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5398:     VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5399:   }
5400:   if (nnsp_has_cnst) {
5401:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);
5402:     VecSet(nullvecs2[nnsp_size],1.0);
5403:   }
5404:   PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);
5405:   MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);

5407:   MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);
5408:   PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);
5409:   PetscContainerSetPointer(c,ddata);
5410:   PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);
5411:   PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);
5412:   PetscContainerDestroy(&c);
5413:   PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5414:   MatDestroy(&dmat);

5416:   for (k=0;k<bsiz;k++) {
5417:     VecDestroy(&nullvecs2[k]);
5418:   }
5419:   PetscFree(nullvecs2);
5420:   MatSetNearNullSpace(B,NullSpace);
5421:   MatNullSpaceDestroy(&NullSpace);
5422:   VecDestroy(&v);
5423:   VecDestroy(&v2);
5424:   VecScatterDestroy(&sct);
5425:   return(0);
5426: }

5428: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5429: {
5430:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
5431:   PC_IS          *pcis = (PC_IS*)pc->data;
5432:   PC             pc_temp;
5433:   Mat            A_RR;
5434:   MatNullSpace   nnsp;
5435:   MatReuse       reuse;
5436:   PetscScalar    m_one = -1.0;
5437:   PetscReal      value;
5438:   PetscInt       n_D,n_R;
5439:   PetscBool      issbaij,opts;
5441:   void           (*f)(void) = NULL;
5442:   char           dir_prefix[256],neu_prefix[256],str_level[16];
5443:   size_t         len;

5446:   PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5447:   /* approximate solver, propagate NearNullSpace if needed */
5448:   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5449:     MatNullSpace gnnsp1,gnnsp2;
5450:     PetscBool    lhas,ghas;

5452:     MatGetNearNullSpace(pcbddc->local_mat,&nnsp);
5453:     MatGetNearNullSpace(pc->pmat,&gnnsp1);
5454:     MatGetNullSpace(pc->pmat,&gnnsp2);
5455:     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5456:     MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
5457:     if (!ghas && (gnnsp1 || gnnsp2)) {
5458:       MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);
5459:     }
5460:   }

5462:   /* compute prefixes */
5463:   PetscStrcpy(dir_prefix,"");
5464:   PetscStrcpy(neu_prefix,"");
5465:   if (!pcbddc->current_level) {
5466:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5467:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5468:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5469:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5470:   } else {
5471:     PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5472:     PetscStrlen(((PetscObject)pc)->prefix,&len);
5473:     len -= 15; /* remove "pc_bddc_coarse_" */
5474:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5475:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5476:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5477:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5478:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5479:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5480:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5481:     PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5482:     PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5483:   }

5485:   /* DIRICHLET PROBLEM */
5486:   if (dirichlet) {
5487:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5488:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5489:       if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5490:       if (pcbddc->dbg_flag) {
5491:         Mat    A_IIn;

5493:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5494:         MatDestroy(&pcis->A_II);
5495:         pcis->A_II = A_IIn;
5496:       }
5497:     }
5498:     if (pcbddc->local_mat->symmetric_set) {
5499:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5500:     }
5501:     /* Matrix for Dirichlet problem is pcis->A_II */
5502:     n_D  = pcis->n - pcis->n_B;
5503:     opts = PETSC_FALSE;
5504:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5505:       opts = PETSC_TRUE;
5506:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5507:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5508:       /* default */
5509:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5510:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5511:       PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5512:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5513:       if (issbaij) {
5514:         PCSetType(pc_temp,PCCHOLESKY);
5515:       } else {
5516:         PCSetType(pc_temp,PCLU);
5517:       }
5518:       KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5519:     }
5520:     MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5521:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5522:     /* Allow user's customization */
5523:     if (opts) {
5524:       KSPSetFromOptions(pcbddc->ksp_D);
5525:     }
5526:     MatGetNearNullSpace(pcis->pA_II,&nnsp);
5527:     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5528:       MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5529:     }
5530:     MatGetNearNullSpace(pcis->pA_II,&nnsp);
5531:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
5532:     PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5533:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5534:       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5535:       const PetscInt *idxs;
5536:       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5538:       ISGetLocalSize(pcis->is_I_local,&nl);
5539:       ISGetIndices(pcis->is_I_local,&idxs);
5540:       PetscMalloc1(nl*cdim,&scoords);
5541:       for (i=0;i<nl;i++) {
5542:         for (d=0;d<cdim;d++) {
5543:           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5544:         }
5545:       }
5546:       ISRestoreIndices(pcis->is_I_local,&idxs);
5547:       PCSetCoordinates(pc_temp,cdim,nl,scoords);
5548:       PetscFree(scoords);
5549:     }
5550:     if (sub_schurs && sub_schurs->reuse_solver) {
5551:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5553:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5554:     }

5556:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5557:     if (!n_D) {
5558:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5559:       PCSetType(pc_temp,PCNONE);
5560:     }
5561:     KSPSetUp(pcbddc->ksp_D);
5562:     /* set ksp_D into pcis data */
5563:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
5564:     KSPDestroy(&pcis->ksp_D);
5565:     pcis->ksp_D = pcbddc->ksp_D;
5566:   }

5568:   /* NEUMANN PROBLEM */
5569:   A_RR = NULL;
5570:   if (neumann) {
5571:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5572:     PetscInt        ibs,mbs;
5573:     PetscBool       issbaij, reuse_neumann_solver;
5574:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

5576:     reuse_neumann_solver = PETSC_FALSE;
5577:     if (sub_schurs && sub_schurs->reuse_solver) {
5578:       IS iP;

5580:       reuse_neumann_solver = PETSC_TRUE;
5581:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5582:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5583:     }
5584:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5585:     ISGetSize(pcbddc->is_R_local,&n_R);
5586:     if (pcbddc->ksp_R) { /* already created ksp */
5587:       PetscInt nn_R;
5588:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5589:       PetscObjectReference((PetscObject)A_RR);
5590:       MatGetSize(A_RR,&nn_R,NULL);
5591:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5592:         KSPReset(pcbddc->ksp_R);
5593:         MatDestroy(&A_RR);
5594:         reuse = MAT_INITIAL_MATRIX;
5595:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5596:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5597:           MatDestroy(&A_RR);
5598:           reuse = MAT_INITIAL_MATRIX;
5599:         } else { /* safe to reuse the matrix */
5600:           reuse = MAT_REUSE_MATRIX;
5601:         }
5602:       }
5603:       /* last check */
5604:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5605:         MatDestroy(&A_RR);
5606:         reuse = MAT_INITIAL_MATRIX;
5607:       }
5608:     } else { /* first time, so we need to create the matrix */
5609:       reuse = MAT_INITIAL_MATRIX;
5610:     }
5611:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5612:        TODO: Get Rid of these conversions */
5613:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5614:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5615:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5616:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5617:       if (matis->A == pcbddc->local_mat) {
5618:         MatDestroy(&pcbddc->local_mat);
5619:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5620:       } else {
5621:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5622:       }
5623:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5624:       if (matis->A == pcbddc->local_mat) {
5625:         MatDestroy(&pcbddc->local_mat);
5626:         MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5627:       } else {
5628:         MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5629:       }
5630:     }
5631:     /* extract A_RR */
5632:     if (reuse_neumann_solver) {
5633:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5635:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5636:         MatDestroy(&A_RR);
5637:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5638:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5639:         } else {
5640:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5641:         }
5642:       } else {
5643:         MatDestroy(&A_RR);
5644:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5645:         PetscObjectReference((PetscObject)A_RR);
5646:       }
5647:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5648:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5649:     }
5650:     if (pcbddc->local_mat->symmetric_set) {
5651:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5652:     }
5653:     opts = PETSC_FALSE;
5654:     if (!pcbddc->ksp_R) { /* create object if not present */
5655:       opts = PETSC_TRUE;
5656:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5657:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5658:       /* default */
5659:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5660:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5661:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5662:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5663:       if (issbaij) {
5664:         PCSetType(pc_temp,PCCHOLESKY);
5665:       } else {
5666:         PCSetType(pc_temp,PCLU);
5667:       }
5668:       KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5669:     }
5670:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5671:     MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5672:     if (opts) { /* Allow user's customization once */
5673:       KSPSetFromOptions(pcbddc->ksp_R);
5674:     }
5675:     MatGetNearNullSpace(A_RR,&nnsp);
5676:     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5677:       MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5678:     }
5679:     MatGetNearNullSpace(A_RR,&nnsp);
5680:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
5681:     PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5682:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5683:       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5684:       const PetscInt *idxs;
5685:       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5687:       ISGetLocalSize(pcbddc->is_R_local,&nl);
5688:       ISGetIndices(pcbddc->is_R_local,&idxs);
5689:       PetscMalloc1(nl*cdim,&scoords);
5690:       for (i=0;i<nl;i++) {
5691:         for (d=0;d<cdim;d++) {
5692:           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5693:         }
5694:       }
5695:       ISRestoreIndices(pcbddc->is_R_local,&idxs);
5696:       PCSetCoordinates(pc_temp,cdim,nl,scoords);
5697:       PetscFree(scoords);
5698:     }

5700:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5701:     if (!n_R) {
5702:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5703:       PCSetType(pc_temp,PCNONE);
5704:     }
5705:     /* Reuse solver if it is present */
5706:     if (reuse_neumann_solver) {
5707:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5709:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5710:     }
5711:     KSPSetUp(pcbddc->ksp_R);
5712:   }

5714:   if (pcbddc->dbg_flag) {
5715:     PetscViewerFlush(pcbddc->dbg_viewer);
5716:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5717:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5718:   }
5719:   PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);

5721:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5722:   if (pcbddc->NullSpace_corr[0]) {
5723:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5724:   }
5725:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5726:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5727:   }
5728:   if (neumann && pcbddc->NullSpace_corr[2]) {
5729:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5730:   }
5731:   /* check Dirichlet and Neumann solvers */
5732:   if (pcbddc->dbg_flag) {
5733:     if (dirichlet) { /* Dirichlet */
5734:       VecSetRandom(pcis->vec1_D,NULL);
5735:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5736:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5737:       KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5738:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5739:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5740:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5741:       PetscViewerFlush(pcbddc->dbg_viewer);
5742:     }
5743:     if (neumann) { /* Neumann */
5744:       VecSetRandom(pcbddc->vec1_R,NULL);
5745:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5746:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5747:       KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5748:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5749:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5750:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5751:       PetscViewerFlush(pcbddc->dbg_viewer);
5752:     }
5753:   }
5754:   /* free Neumann problem's matrix */
5755:   MatDestroy(&A_RR);
5756:   return(0);
5757: }

5759: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5760: {
5761:   PetscErrorCode  ierr;
5762:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5763:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5764:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;

5767:   if (!reuse_solver) {
5768:     VecSet(pcbddc->vec1_R,0.);
5769:   }
5770:   if (!pcbddc->switch_static) {
5771:     if (applytranspose && pcbddc->local_auxmat1) {
5772:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5773:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5774:     }
5775:     if (!reuse_solver) {
5776:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5777:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5778:     } else {
5779:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5781:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5782:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5783:     }
5784:   } else {
5785:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5786:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5787:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5788:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5789:     if (applytranspose && pcbddc->local_auxmat1) {
5790:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5791:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5792:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5793:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5794:     }
5795:   }
5796:   PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);
5797:   if (!reuse_solver || pcbddc->switch_static) {
5798:     if (applytranspose) {
5799:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5800:     } else {
5801:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5802:     }
5803:     KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5804:   } else {
5805:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5807:     if (applytranspose) {
5808:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5809:     } else {
5810:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5811:     }
5812:   }
5813:   PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);
5814:   VecSet(inout_B,0.);
5815:   if (!pcbddc->switch_static) {
5816:     if (!reuse_solver) {
5817:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5818:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5819:     } else {
5820:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5822:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5823:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5824:     }
5825:     if (!applytranspose && pcbddc->local_auxmat1) {
5826:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5827:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5828:     }
5829:   } else {
5830:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5831:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5832:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5833:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5834:     if (!applytranspose && pcbddc->local_auxmat1) {
5835:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5836:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5837:     }
5838:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5839:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5840:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5841:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5842:   }
5843:   return(0);
5844: }

5846: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5847: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5848: {
5850:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5851:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5852:   const PetscScalar zero = 0.0;

5855:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5856:   if (!pcbddc->benign_apply_coarse_only) {
5857:     if (applytranspose) {
5858:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5859:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5860:     } else {
5861:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5862:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5863:     }
5864:   } else {
5865:     VecSet(pcbddc->vec1_P,zero);
5866:   }

5868:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5869:   if (pcbddc->benign_n) {
5870:     PetscScalar *array;
5871:     PetscInt    j;

5873:     VecGetArray(pcbddc->vec1_P,&array);
5874:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5875:     VecRestoreArray(pcbddc->vec1_P,&array);
5876:   }

5878:   /* start communications from local primal nodes to rhs of coarse solver */
5879:   VecSet(pcbddc->coarse_vec,zero);
5880:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5881:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5883:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5884:   if (pcbddc->coarse_ksp) {
5885:     Mat          coarse_mat;
5886:     Vec          rhs,sol;
5887:     MatNullSpace nullsp;
5888:     PetscBool    isbddc = PETSC_FALSE;

5890:     if (pcbddc->benign_have_null) {
5891:       PC        coarse_pc;

5893:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5894:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5895:       /* we need to propagate to coarser levels the need for a possible benign correction */
5896:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5897:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5898:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5899:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5900:       }
5901:     }
5902:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5903:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5904:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5905:     if (applytranspose) {
5906:       if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5907:       PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5908:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5909:       PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5910:       KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5911:       MatGetTransposeNullSpace(coarse_mat,&nullsp);
5912:       if (nullsp) {
5913:         MatNullSpaceRemove(nullsp,sol);
5914:       }
5915:     } else {
5916:       MatGetNullSpace(coarse_mat,&nullsp);
5917:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5918:         PC        coarse_pc;

5920:         if (nullsp) {
5921:           MatNullSpaceRemove(nullsp,rhs);
5922:         }
5923:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5924:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5925:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5926:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5927:       } else {
5928:         PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5929:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5930:         PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5931:         KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5932:         if (nullsp) {
5933:           MatNullSpaceRemove(nullsp,sol);
5934:         }
5935:       }
5936:     }
5937:     /* we don't need the benign correction at coarser levels anymore */
5938:     if (pcbddc->benign_have_null && isbddc) {
5939:       PC        coarse_pc;
5940:       PC_BDDC*  coarsepcbddc;

5942:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5943:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5944:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5945:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5946:     }
5947:   }

5949:   /* Local solution on R nodes */
5950:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5951:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5952:   }
5953:   /* communications from coarse sol to local primal nodes */
5954:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5955:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5957:   /* Sum contributions from the two levels */
5958:   if (!pcbddc->benign_apply_coarse_only) {
5959:     if (applytranspose) {
5960:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5961:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5962:     } else {
5963:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5964:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5965:     }
5966:     /* store p0 */
5967:     if (pcbddc->benign_n) {
5968:       PetscScalar *array;
5969:       PetscInt    j;

5971:       VecGetArray(pcbddc->vec1_P,&array);
5972:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5973:       VecRestoreArray(pcbddc->vec1_P,&array);
5974:     }
5975:   } else { /* expand the coarse solution */
5976:     if (applytranspose) {
5977:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5978:     } else {
5979:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5980:     }
5981:   }
5982:   return(0);
5983: }

5985: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5986: {
5987:   PC_BDDC*          pcbddc = (PC_BDDC*)(pc->data);
5988:   Vec               from,to;
5989:   const PetscScalar *array;
5990:   PetscErrorCode    ierr;

5993:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5994:     from = pcbddc->coarse_vec;
5995:     to = pcbddc->vec1_P;
5996:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5997:       Vec tvec;

5999:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
6000:       VecResetArray(tvec);
6001:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
6002:       VecGetArrayRead(tvec,&array);
6003:       VecPlaceArray(from,array);
6004:       VecRestoreArrayRead(tvec,&array);
6005:     }
6006:   } else { /* from local to global -> put data in coarse right hand side */
6007:     from = pcbddc->vec1_P;
6008:     to = pcbddc->coarse_vec;
6009:   }
6010:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
6011:   return(0);
6012: }

6014: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6015: {
6016:   PC_BDDC*          pcbddc = (PC_BDDC*)(pc->data);
6017:   Vec               from,to;
6018:   const PetscScalar *array;
6019:   PetscErrorCode    ierr;

6022:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6023:     from = pcbddc->coarse_vec;
6024:     to = pcbddc->vec1_P;
6025:   } else { /* from local to global -> put data in coarse right hand side */
6026:     from = pcbddc->vec1_P;
6027:     to = pcbddc->coarse_vec;
6028:   }
6029:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
6030:   if (smode == SCATTER_FORWARD) {
6031:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6032:       Vec tvec;

6034:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
6035:       VecGetArrayRead(to,&array);
6036:       VecPlaceArray(tvec,array);
6037:       VecRestoreArrayRead(to,&array);
6038:     }
6039:   } else {
6040:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6041:      VecResetArray(from);
6042:     }
6043:   }
6044:   return(0);
6045: }

6047: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6048: {
6049:   PetscErrorCode    ierr;
6050:   PC_IS*            pcis = (PC_IS*)(pc->data);
6051:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
6052:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
6053:   /* one and zero */
6054:   PetscScalar       one=1.0,zero=0.0;
6055:   /* space to store constraints and their local indices */
6056:   PetscScalar       *constraints_data;
6057:   PetscInt          *constraints_idxs,*constraints_idxs_B;
6058:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
6059:   PetscInt          *constraints_n;
6060:   /* iterators */
6061:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
6062:   /* BLAS integers */
6063:   PetscBLASInt      lwork,lierr;
6064:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
6065:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
6066:   /* reuse */
6067:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
6068:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
6069:   /* change of basis */
6070:   PetscBool         qr_needed;
6071:   PetscBT           change_basis,qr_needed_idx;
6072:   /* auxiliary stuff */
6073:   PetscInt          *nnz,*is_indices;
6074:   PetscInt          ncc;
6075:   /* some quantities */
6076:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
6077:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
6078:   PetscReal         tol; /* tolerance for retaining eigenmodes */

6081:   tol  = PetscSqrtReal(PETSC_SMALL);
6082:   /* Destroy Mat objects computed previously */
6083:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
6084:   MatDestroy(&pcbddc->ConstraintMatrix);
6085:   MatDestroy(&pcbddc->switch_static_change);
6086:   /* save info on constraints from previous setup (if any) */
6087:   olocal_primal_size = pcbddc->local_primal_size;
6088:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6089:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
6090:   PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);
6091:   PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);
6092:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
6093:   PetscFree(pcbddc->primal_indices_local_idxs);

6095:   if (!pcbddc->adaptive_selection) {
6096:     IS           ISForVertices,*ISForFaces,*ISForEdges;
6097:     MatNullSpace nearnullsp;
6098:     const Vec    *nearnullvecs;
6099:     Vec          *localnearnullsp;
6100:     PetscScalar  *array;
6101:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
6102:     PetscBool    nnsp_has_cnst;
6103:     /* LAPACK working arrays for SVD or POD */
6104:     PetscBool    skip_lapack,boolforchange;
6105:     PetscScalar  *work;
6106:     PetscReal    *singular_vals;
6107: #if defined(PETSC_USE_COMPLEX)
6108:     PetscReal    *rwork;
6109: #endif
6110:     PetscScalar  *temp_basis = NULL,*correlation_mat = NULL;
6111:     PetscBLASInt dummy_int=1;
6112:     PetscScalar  dummy_scalar=1.;
6113:     PetscBool    use_pod = PETSC_FALSE;

6115:     /* MKL SVD with same input gives different results on different processes! */
6116: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL)
6117:     use_pod = PETSC_TRUE;
6118: #endif
6119:     /* Get index sets for faces, edges and vertices from graph */
6120:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6121:     /* print some info */
6122:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6123:       PetscInt nv;

6125:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6126:       ISGetSize(ISForVertices,&nv);
6127:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6128:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6129:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6130:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6131:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6132:       PetscViewerFlush(pcbddc->dbg_viewer);
6133:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6134:     }

6136:     /* free unneeded index sets */
6137:     if (!pcbddc->use_vertices) {
6138:       ISDestroy(&ISForVertices);
6139:     }
6140:     if (!pcbddc->use_edges) {
6141:       for (i=0;i<n_ISForEdges;i++) {
6142:         ISDestroy(&ISForEdges[i]);
6143:       }
6144:       PetscFree(ISForEdges);
6145:       n_ISForEdges = 0;
6146:     }
6147:     if (!pcbddc->use_faces) {
6148:       for (i=0;i<n_ISForFaces;i++) {
6149:         ISDestroy(&ISForFaces[i]);
6150:       }
6151:       PetscFree(ISForFaces);
6152:       n_ISForFaces = 0;
6153:     }

6155:     /* check if near null space is attached to global mat */
6156:     if (pcbddc->use_nnsp) {
6157:       MatGetNearNullSpace(pc->pmat,&nearnullsp);
6158:     } else nearnullsp = NULL;

6160:     if (nearnullsp) {
6161:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6162:       /* remove any stored info */
6163:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
6164:       PetscFree(pcbddc->onearnullvecs_state);
6165:       /* store information for BDDC solver reuse */
6166:       PetscObjectReference((PetscObject)nearnullsp);
6167:       pcbddc->onearnullspace = nearnullsp;
6168:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6169:       for (i=0;i<nnsp_size;i++) {
6170:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6171:       }
6172:     } else { /* if near null space is not provided BDDC uses constants by default */
6173:       nnsp_size = 0;
6174:       nnsp_has_cnst = PETSC_TRUE;
6175:     }
6176:     /* get max number of constraints on a single cc */
6177:     max_constraints = nnsp_size;
6178:     if (nnsp_has_cnst) max_constraints++;

6180:     /*
6181:          Evaluate maximum storage size needed by the procedure
6182:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6183:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6184:          There can be multiple constraints per connected component
6185:                                                                                                                                                            */
6186:     n_vertices = 0;
6187:     if (ISForVertices) {
6188:       ISGetSize(ISForVertices,&n_vertices);
6189:     }
6190:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6191:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

6193:     total_counts = n_ISForFaces+n_ISForEdges;
6194:     total_counts *= max_constraints;
6195:     total_counts += n_vertices;
6196:     PetscBTCreate(total_counts,&change_basis);

6198:     total_counts = 0;
6199:     max_size_of_constraint = 0;
6200:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6201:       IS used_is;
6202:       if (i<n_ISForEdges) {
6203:         used_is = ISForEdges[i];
6204:       } else {
6205:         used_is = ISForFaces[i-n_ISForEdges];
6206:       }
6207:       ISGetSize(used_is,&j);
6208:       total_counts += j;
6209:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6210:     }
6211:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

6213:     /* get local part of global near null space vectors */
6214:     PetscMalloc1(nnsp_size,&localnearnullsp);
6215:     for (k=0;k<nnsp_size;k++) {
6216:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6217:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6218:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6219:     }

6221:     /* whether or not to skip lapack calls */
6222:     skip_lapack = PETSC_TRUE;
6223:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

6225:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6226:     if (!skip_lapack) {
6227:       PetscScalar temp_work;

6229:       if (use_pod) {
6230:         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6231:         PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6232:         PetscMalloc1(max_constraints,&singular_vals);
6233:         PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6234: #if defined(PETSC_USE_COMPLEX)
6235:         PetscMalloc1(3*max_constraints,&rwork);
6236: #endif
6237:         /* now we evaluate the optimal workspace using query with lwork=-1 */
6238:         PetscBLASIntCast(max_constraints,&Blas_N);
6239:         PetscBLASIntCast(max_constraints,&Blas_LDA);
6240:         lwork = -1;
6241:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6242: #if !defined(PETSC_USE_COMPLEX)
6243:         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6244: #else
6245:         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6246: #endif
6247:         PetscFPTrapPop();
6248:         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6249:       } else {
6250: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6251:         /* SVD */
6252:         PetscInt max_n,min_n;
6253:         max_n = max_size_of_constraint;
6254:         min_n = max_constraints;
6255:         if (max_size_of_constraint < max_constraints) {
6256:           min_n = max_size_of_constraint;
6257:           max_n = max_constraints;
6258:         }
6259:         PetscMalloc1(min_n,&singular_vals);
6260: #if defined(PETSC_USE_COMPLEX)
6261:         PetscMalloc1(5*min_n,&rwork);
6262: #endif
6263:         /* now we evaluate the optimal workspace using query with lwork=-1 */
6264:         lwork = -1;
6265:         PetscBLASIntCast(max_n,&Blas_M);
6266:         PetscBLASIntCast(min_n,&Blas_N);
6267:         PetscBLASIntCast(max_n,&Blas_LDA);
6268:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6269: #if !defined(PETSC_USE_COMPLEX)
6270:         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6271: #else
6272:         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6273: #endif
6274:         PetscFPTrapPop();
6275:         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6276: #else
6277:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6278: #endif /* on missing GESVD */
6279:       }
6280:       /* Allocate optimal workspace */
6281:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6282:       PetscMalloc1(lwork,&work);
6283:     }
6284:     /* Now we can loop on constraining sets */
6285:     total_counts = 0;
6286:     constraints_idxs_ptr[0] = 0;
6287:     constraints_data_ptr[0] = 0;
6288:     /* vertices */
6289:     if (n_vertices) {
6290:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6291:       PetscArraycpy(constraints_idxs,is_indices,n_vertices);
6292:       for (i=0;i<n_vertices;i++) {
6293:         constraints_n[total_counts] = 1;
6294:         constraints_data[total_counts] = 1.0;
6295:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6296:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6297:         total_counts++;
6298:       }
6299:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6300:       n_vertices = total_counts;
6301:     }

6303:     /* edges and faces */
6304:     total_counts_cc = total_counts;
6305:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6306:       IS        used_is;
6307:       PetscBool idxs_copied = PETSC_FALSE;

6309:       if (ncc<n_ISForEdges) {
6310:         used_is = ISForEdges[ncc];
6311:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6312:       } else {
6313:         used_is = ISForFaces[ncc-n_ISForEdges];
6314:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6315:       }
6316:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

6318:       ISGetSize(used_is,&size_of_constraint);
6319:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
6320:       /* change of basis should not be performed on local periodic nodes */
6321:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6322:       if (nnsp_has_cnst) {
6323:         PetscScalar quad_value;

6325:         PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6326:         idxs_copied = PETSC_TRUE;

6328:         if (!pcbddc->use_nnsp_true) {
6329:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6330:         } else {
6331:           quad_value = 1.0;
6332:         }
6333:         for (j=0;j<size_of_constraint;j++) {
6334:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6335:         }
6336:         temp_constraints++;
6337:         total_counts++;
6338:       }
6339:       for (k=0;k<nnsp_size;k++) {
6340:         PetscReal real_value;
6341:         PetscScalar *ptr_to_data;

6343:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6344:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6345:         for (j=0;j<size_of_constraint;j++) {
6346:           ptr_to_data[j] = array[is_indices[j]];
6347:         }
6348:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6349:         /* check if array is null on the connected component */
6350:         PetscBLASIntCast(size_of_constraint,&Blas_N);
6351:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6352:         if (real_value > tol*size_of_constraint) { /* keep indices and values */
6353:           temp_constraints++;
6354:           total_counts++;
6355:           if (!idxs_copied) {
6356:             PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6357:             idxs_copied = PETSC_TRUE;
6358:           }
6359:         }
6360:       }
6361:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6362:       valid_constraints = temp_constraints;
6363:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6364:         if (temp_constraints == 1) { /* just normalize the constraint */
6365:           PetscScalar norm,*ptr_to_data;

6367:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6368:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6369:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6370:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6371:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6372:         } else { /* perform SVD */
6373:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6375:           if (use_pod) {
6376:             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6377:                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6378:                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6379:                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6380:                   from that computed using LAPACKgesvd
6381:                -> This is due to a different computation of eigenvectors in LAPACKheev
6382:                -> The quality of the POD-computed basis will be the same */
6383:             PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);
6384:             /* Store upper triangular part of correlation matrix */
6385:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6386:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6387:             for (j=0;j<temp_constraints;j++) {
6388:               for (k=0;k<j+1;k++) {
6389:                 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6390:               }
6391:             }
6392:             /* compute eigenvalues and eigenvectors of correlation matrix */
6393:             PetscBLASIntCast(temp_constraints,&Blas_N);
6394:             PetscBLASIntCast(temp_constraints,&Blas_LDA);
6395: #if !defined(PETSC_USE_COMPLEX)
6396:             PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6397: #else
6398:             PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6399: #endif
6400:             PetscFPTrapPop();
6401:             if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6402:             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6403:             j = 0;
6404:             while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6405:             total_counts = total_counts-j;
6406:             valid_constraints = temp_constraints-j;
6407:             /* scale and copy POD basis into used quadrature memory */
6408:             PetscBLASIntCast(size_of_constraint,&Blas_M);
6409:             PetscBLASIntCast(temp_constraints,&Blas_N);
6410:             PetscBLASIntCast(temp_constraints,&Blas_K);
6411:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6412:             PetscBLASIntCast(temp_constraints,&Blas_LDB);
6413:             PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6414:             if (j<temp_constraints) {
6415:               PetscInt ii;
6416:               for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6417:               PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6418:               PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6419:               PetscFPTrapPop();
6420:               for (k=0;k<temp_constraints-j;k++) {
6421:                 for (ii=0;ii<size_of_constraint;ii++) {
6422:                   ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6423:                 }
6424:               }
6425:             }
6426:           } else {
6427: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6428:             PetscBLASIntCast(size_of_constraint,&Blas_M);
6429:             PetscBLASIntCast(temp_constraints,&Blas_N);
6430:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6431:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6432: #if !defined(PETSC_USE_COMPLEX)
6433:             PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6434: #else
6435:             PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6436: #endif
6437:             if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6438:             PetscFPTrapPop();
6439:             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6440:             k = temp_constraints;
6441:             if (k > size_of_constraint) k = size_of_constraint;
6442:             j = 0;
6443:             while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6444:             valid_constraints = k-j;
6445:             total_counts = total_counts-temp_constraints+valid_constraints;
6446: #else
6447:             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6448: #endif /* on missing GESVD */
6449:           }
6450:         }
6451:       }
6452:       /* update pointers information */
6453:       if (valid_constraints) {
6454:         constraints_n[total_counts_cc] = valid_constraints;
6455:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6456:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6457:         /* set change_of_basis flag */
6458:         if (boolforchange) {
6459:           PetscBTSet(change_basis,total_counts_cc);
6460:         }
6461:         total_counts_cc++;
6462:       }
6463:     }
6464:     /* free workspace */
6465:     if (!skip_lapack) {
6466:       PetscFree(work);
6467: #if defined(PETSC_USE_COMPLEX)
6468:       PetscFree(rwork);
6469: #endif
6470:       PetscFree(singular_vals);
6471:       PetscFree(correlation_mat);
6472:       PetscFree(temp_basis);
6473:     }
6474:     for (k=0;k<nnsp_size;k++) {
6475:       VecDestroy(&localnearnullsp[k]);
6476:     }
6477:     PetscFree(localnearnullsp);
6478:     /* free index sets of faces, edges and vertices */
6479:     for (i=0;i<n_ISForFaces;i++) {
6480:       ISDestroy(&ISForFaces[i]);
6481:     }
6482:     if (n_ISForFaces) {
6483:       PetscFree(ISForFaces);
6484:     }
6485:     for (i=0;i<n_ISForEdges;i++) {
6486:       ISDestroy(&ISForEdges[i]);
6487:     }
6488:     if (n_ISForEdges) {
6489:       PetscFree(ISForEdges);
6490:     }
6491:     ISDestroy(&ISForVertices);
6492:   } else {
6493:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6495:     total_counts = 0;
6496:     n_vertices = 0;
6497:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6498:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6499:     }
6500:     max_constraints = 0;
6501:     total_counts_cc = 0;
6502:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6503:       total_counts += pcbddc->adaptive_constraints_n[i];
6504:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6505:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6506:     }
6507:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6508:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6509:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
6510:     constraints_data = pcbddc->adaptive_constraints_data;
6511:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6512:     PetscMalloc1(total_counts_cc,&constraints_n);
6513:     total_counts_cc = 0;
6514:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6515:       if (pcbddc->adaptive_constraints_n[i]) {
6516:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6517:       }
6518:     }

6520:     max_size_of_constraint = 0;
6521:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6522:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6523:     /* Change of basis */
6524:     PetscBTCreate(total_counts_cc,&change_basis);
6525:     if (pcbddc->use_change_of_basis) {
6526:       for (i=0;i<sub_schurs->n_subs;i++) {
6527:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6528:           PetscBTSet(change_basis,i+n_vertices);
6529:         }
6530:       }
6531:     }
6532:   }
6533:   pcbddc->local_primal_size = total_counts;
6534:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

6536:   /* map constraints_idxs in boundary numbering */
6537:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6538:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);

6540:   /* Create constraint matrix */
6541:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6542:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6543:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

6545:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6546:   /* determine if a QR strategy is needed for change of basis */
6547:   qr_needed = pcbddc->use_qr_single;
6548:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
6549:   total_primal_vertices=0;
6550:   pcbddc->local_primal_size_cc = 0;
6551:   for (i=0;i<total_counts_cc;i++) {
6552:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6553:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6554:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6555:       pcbddc->local_primal_size_cc += 1;
6556:     } else if (PetscBTLookup(change_basis,i)) {
6557:       for (k=0;k<constraints_n[i];k++) {
6558:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6559:       }
6560:       pcbddc->local_primal_size_cc += constraints_n[i];
6561:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6562:         PetscBTSet(qr_needed_idx,i);
6563:         qr_needed = PETSC_TRUE;
6564:       }
6565:     } else {
6566:       pcbddc->local_primal_size_cc += 1;
6567:     }
6568:   }
6569:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6570:   pcbddc->n_vertices = total_primal_vertices;
6571:   /* permute indices in order to have a sorted set of vertices */
6572:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6573:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6574:   PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);
6575:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

6577:   /* nonzero structure of constraint matrix */
6578:   /* and get reference dof for local constraints */
6579:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
6580:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

6582:   j = total_primal_vertices;
6583:   total_counts = total_primal_vertices;
6584:   cum = total_primal_vertices;
6585:   for (i=n_vertices;i<total_counts_cc;i++) {
6586:     if (!PetscBTLookup(change_basis,i)) {
6587:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6588:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6589:       cum++;
6590:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6591:       for (k=0;k<constraints_n[i];k++) {
6592:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6593:         nnz[j+k] = size_of_constraint;
6594:       }
6595:       j += constraints_n[i];
6596:     }
6597:   }
6598:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6599:   MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6600:   PetscFree(nnz);

6602:   /* set values in constraint matrix */
6603:   for (i=0;i<total_primal_vertices;i++) {
6604:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6605:   }
6606:   total_counts = total_primal_vertices;
6607:   for (i=n_vertices;i<total_counts_cc;i++) {
6608:     if (!PetscBTLookup(change_basis,i)) {
6609:       PetscInt *cols;

6611:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6612:       cols = constraints_idxs+constraints_idxs_ptr[i];
6613:       for (k=0;k<constraints_n[i];k++) {
6614:         PetscInt    row = total_counts+k;
6615:         PetscScalar *vals;

6617:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6618:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6619:       }
6620:       total_counts += constraints_n[i];
6621:     }
6622:   }
6623:   /* assembling */
6624:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6625:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6626:   MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");

6628:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6629:   if (pcbddc->use_change_of_basis) {
6630:     /* dual and primal dofs on a single cc */
6631:     PetscInt     dual_dofs,primal_dofs;
6632:     /* working stuff for GEQRF */
6633:     PetscScalar  *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6634:     PetscBLASInt lqr_work;
6635:     /* working stuff for UNGQR */
6636:     PetscScalar  *gqr_work = NULL,lgqr_work_t=0.0;
6637:     PetscBLASInt lgqr_work;
6638:     /* working stuff for TRTRS */
6639:     PetscScalar  *trs_rhs = NULL;
6640:     PetscBLASInt Blas_NRHS;
6641:     /* pointers for values insertion into change of basis matrix */
6642:     PetscInt     *start_rows,*start_cols;
6643:     PetscScalar  *start_vals;
6644:     /* working stuff for values insertion */
6645:     PetscBT      is_primal;
6646:     PetscInt     *aux_primal_numbering_B;
6647:     /* matrix sizes */
6648:     PetscInt     global_size,local_size;
6649:     /* temporary change of basis */
6650:     Mat          localChangeOfBasisMatrix;
6651:     /* extra space for debugging */
6652:     PetscScalar  *dbg_work = NULL;

6654:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6655:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6656:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6657:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6658:     /* nonzeros for local mat */
6659:     PetscMalloc1(pcis->n,&nnz);
6660:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6661:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6662:     } else {
6663:       const PetscInt *ii;
6664:       PetscInt       n;
6665:       PetscBool      flg_row;
6666:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6667:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6668:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6669:     }
6670:     for (i=n_vertices;i<total_counts_cc;i++) {
6671:       if (PetscBTLookup(change_basis,i)) {
6672:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6673:         if (PetscBTLookup(qr_needed_idx,i)) {
6674:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6675:         } else {
6676:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6677:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6678:         }
6679:       }
6680:     }
6681:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6682:     MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6683:     PetscFree(nnz);
6684:     /* Set interior change in the matrix */
6685:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6686:       for (i=0;i<pcis->n;i++) {
6687:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6688:       }
6689:     } else {
6690:       const PetscInt *ii,*jj;
6691:       PetscScalar    *aa;
6692:       PetscInt       n;
6693:       PetscBool      flg_row;
6694:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6695:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6696:       for (i=0;i<n;i++) {
6697:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6698:       }
6699:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6700:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6701:     }

6703:     if (pcbddc->dbg_flag) {
6704:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6705:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6706:     }


6709:     /* Now we loop on the constraints which need a change of basis */
6710:     /*
6711:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6712:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6714:        Basic blocks of change of basis matrix T computed by

6716:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6718:             | 1        0   ...        0         s_1/S |
6719:             | 0        1   ...        0         s_2/S |
6720:             |              ...                        |
6721:             | 0        ...            1     s_{n-1}/S |
6722:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6724:             with S = \sum_{i=1}^n s_i^2
6725:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6726:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6728:           - QR decomposition of constraints otherwise
6729:     */
6730:     if (qr_needed && max_size_of_constraint) {
6731:       /* space to store Q */
6732:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6733:       /* array to store scaling factors for reflectors */
6734:       PetscMalloc1(max_constraints,&qr_tau);
6735:       /* first we issue queries for optimal work */
6736:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6737:       PetscBLASIntCast(max_constraints,&Blas_N);
6738:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6739:       lqr_work = -1;
6740:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6741:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6742:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6743:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6744:       lgqr_work = -1;
6745:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6746:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6747:       PetscBLASIntCast(max_constraints,&Blas_K);
6748:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6749:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6750:       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6751:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6752:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6753:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6754:       /* array to store rhs and solution of triangular solver */
6755:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6756:       /* allocating workspace for check */
6757:       if (pcbddc->dbg_flag) {
6758:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6759:       }
6760:     }
6761:     /* array to store whether a node is primal or not */
6762:     PetscBTCreate(pcis->n_B,&is_primal);
6763:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6764:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6765:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6766:     for (i=0;i<total_primal_vertices;i++) {
6767:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6768:     }
6769:     PetscFree(aux_primal_numbering_B);

6771:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6772:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6773:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6774:       if (PetscBTLookup(change_basis,total_counts)) {
6775:         /* get constraint info */
6776:         primal_dofs = constraints_n[total_counts];
6777:         dual_dofs = size_of_constraint-primal_dofs;

6779:         if (pcbddc->dbg_flag) {
6780:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6781:         }

6783:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6785:           /* copy quadrature constraints for change of basis check */
6786:           if (pcbddc->dbg_flag) {
6787:             PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6788:           }
6789:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6790:           PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);

6792:           /* compute QR decomposition of constraints */
6793:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6794:           PetscBLASIntCast(primal_dofs,&Blas_N);
6795:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6796:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6797:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6798:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6799:           PetscFPTrapPop();

6801:           /* explictly compute R^-T */
6802:           PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);
6803:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6804:           PetscBLASIntCast(primal_dofs,&Blas_N);
6805:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6806:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6807:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6808:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6809:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6810:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6811:           PetscFPTrapPop();

6813:           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6814:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6815:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6816:           PetscBLASIntCast(primal_dofs,&Blas_K);
6817:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6818:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6819:           PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6820:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6821:           PetscFPTrapPop();

6823:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6824:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6825:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6826:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6827:           PetscBLASIntCast(primal_dofs,&Blas_N);
6828:           PetscBLASIntCast(primal_dofs,&Blas_K);
6829:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6830:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6831:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6832:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6833:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6834:           PetscFPTrapPop();
6835:           PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);

6837:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6838:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6839:           /* insert cols for primal dofs */
6840:           for (j=0;j<primal_dofs;j++) {
6841:             start_vals = &qr_basis[j*size_of_constraint];
6842:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6843:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6844:           }
6845:           /* insert cols for dual dofs */
6846:           for (j=0,k=0;j<dual_dofs;k++) {
6847:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6848:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6849:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6850:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6851:               j++;
6852:             }
6853:           }

6855:           /* check change of basis */
6856:           if (pcbddc->dbg_flag) {
6857:             PetscInt   ii,jj;
6858:             PetscBool valid_qr=PETSC_TRUE;
6859:             PetscBLASIntCast(primal_dofs,&Blas_M);
6860:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6861:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6862:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6863:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6864:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6865:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6866:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6867:             PetscFPTrapPop();
6868:             for (jj=0;jj<size_of_constraint;jj++) {
6869:               for (ii=0;ii<primal_dofs;ii++) {
6870:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6871:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6872:               }
6873:             }
6874:             if (!valid_qr) {
6875:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6876:               for (jj=0;jj<size_of_constraint;jj++) {
6877:                 for (ii=0;ii<primal_dofs;ii++) {
6878:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6879:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6880:                   }
6881:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6882:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6883:                   }
6884:                 }
6885:               }
6886:             } else {
6887:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6888:             }
6889:           }
6890:         } else { /* simple transformation block */
6891:           PetscInt    row,col;
6892:           PetscScalar val,norm;

6894:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6895:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6896:           for (j=0;j<size_of_constraint;j++) {
6897:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6898:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6899:             if (!PetscBTLookup(is_primal,row_B)) {
6900:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6901:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6902:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6903:             } else {
6904:               for (k=0;k<size_of_constraint;k++) {
6905:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6906:                 if (row != col) {
6907:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6908:                 } else {
6909:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6910:                 }
6911:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6912:               }
6913:             }
6914:           }
6915:           if (pcbddc->dbg_flag) {
6916:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6917:           }
6918:         }
6919:       } else {
6920:         if (pcbddc->dbg_flag) {
6921:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6922:         }
6923:       }
6924:     }

6926:     /* free workspace */
6927:     if (qr_needed) {
6928:       if (pcbddc->dbg_flag) {
6929:         PetscFree(dbg_work);
6930:       }
6931:       PetscFree(trs_rhs);
6932:       PetscFree(qr_tau);
6933:       PetscFree(qr_work);
6934:       PetscFree(gqr_work);
6935:       PetscFree(qr_basis);
6936:     }
6937:     PetscBTDestroy(&is_primal);
6938:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6939:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6941:     /* assembling of global change of variable */
6942:     if (!pcbddc->fake_change) {
6943:       Mat      tmat;
6944:       PetscInt bs;

6946:       VecGetSize(pcis->vec1_global,&global_size);
6947:       VecGetLocalSize(pcis->vec1_global,&local_size);
6948:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6949:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6950:       MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6951:       MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6952:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6953:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6954:       MatGetBlockSize(pc->pmat,&bs);
6955:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6956:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6957:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6958:       MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6959:       MatDestroy(&tmat);
6960:       VecSet(pcis->vec1_global,0.0);
6961:       VecSet(pcis->vec1_N,1.0);
6962:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6963:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6964:       VecReciprocal(pcis->vec1_global);
6965:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6967:       /* check */
6968:       if (pcbddc->dbg_flag) {
6969:         PetscReal error;
6970:         Vec       x,x_change;

6972:         VecDuplicate(pcis->vec1_global,&x);
6973:         VecDuplicate(pcis->vec1_global,&x_change);
6974:         VecSetRandom(x,NULL);
6975:         VecCopy(x,pcis->vec1_global);
6976:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6977:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6978:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6979:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6980:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6981:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6982:         VecAXPY(x,-1.0,x_change);
6983:         VecNorm(x,NORM_INFINITY,&error);
6984:         if (error > PETSC_SMALL) {
6985:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6986:         }
6987:         VecDestroy(&x);
6988:         VecDestroy(&x_change);
6989:       }
6990:       /* adapt sub_schurs computed (if any) */
6991:       if (pcbddc->use_deluxe_scaling) {
6992:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6994:         if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6995:         if (sub_schurs && sub_schurs->S_Ej_all) {
6996:           Mat                    S_new,tmat;
6997:           IS                     is_all_N,is_V_Sall = NULL;

6999:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
7000:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
7001:           if (pcbddc->deluxe_zerorows) {
7002:             ISLocalToGlobalMapping NtoSall;
7003:             IS                     is_V;
7004:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
7005:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
7006:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
7007:             ISLocalToGlobalMappingDestroy(&NtoSall);
7008:             ISDestroy(&is_V);
7009:           }
7010:           ISDestroy(&is_all_N);
7011:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7012:           MatDestroy(&sub_schurs->S_Ej_all);
7013:           PetscObjectReference((PetscObject)S_new);
7014:           if (pcbddc->deluxe_zerorows) {
7015:             const PetscScalar *array;
7016:             const PetscInt    *idxs_V,*idxs_all;
7017:             PetscInt          i,n_V;

7019:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7020:             ISGetLocalSize(is_V_Sall,&n_V);
7021:             ISGetIndices(is_V_Sall,&idxs_V);
7022:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
7023:             VecGetArrayRead(pcis->D,&array);
7024:             for (i=0;i<n_V;i++) {
7025:               PetscScalar val;
7026:               PetscInt    idx;

7028:               idx = idxs_V[i];
7029:               val = array[idxs_all[idxs_V[i]]];
7030:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
7031:             }
7032:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
7033:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
7034:             VecRestoreArrayRead(pcis->D,&array);
7035:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
7036:             ISRestoreIndices(is_V_Sall,&idxs_V);
7037:           }
7038:           sub_schurs->S_Ej_all = S_new;
7039:           MatDestroy(&S_new);
7040:           if (sub_schurs->sum_S_Ej_all) {
7041:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7042:             MatDestroy(&sub_schurs->sum_S_Ej_all);
7043:             PetscObjectReference((PetscObject)S_new);
7044:             if (pcbddc->deluxe_zerorows) {
7045:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7046:             }
7047:             sub_schurs->sum_S_Ej_all = S_new;
7048:             MatDestroy(&S_new);
7049:           }
7050:           ISDestroy(&is_V_Sall);
7051:           MatDestroy(&tmat);
7052:         }
7053:         /* destroy any change of basis context in sub_schurs */
7054:         if (sub_schurs && sub_schurs->change) {
7055:           PetscInt i;

7057:           for (i=0;i<sub_schurs->n_subs;i++) {
7058:             KSPDestroy(&sub_schurs->change[i]);
7059:           }
7060:           PetscFree(sub_schurs->change);
7061:         }
7062:       }
7063:       if (pcbddc->switch_static) { /* need to save the local change */
7064:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7065:       } else {
7066:         MatDestroy(&localChangeOfBasisMatrix);
7067:       }
7068:       /* determine if any process has changed the pressures locally */
7069:       pcbddc->change_interior = pcbddc->benign_have_null;
7070:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7071:       MatDestroy(&pcbddc->ConstraintMatrix);
7072:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7073:       pcbddc->use_qr_single = qr_needed;
7074:     }
7075:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7076:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7077:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
7078:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7079:     } else {
7080:       Mat benign_global = NULL;
7081:       if (pcbddc->benign_have_null) {
7082:         Mat M;

7084:         pcbddc->change_interior = PETSC_TRUE;
7085:         VecCopy(matis->counter,pcis->vec1_N);
7086:         VecReciprocal(pcis->vec1_N);
7087:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
7088:         if (pcbddc->benign_change) {
7089:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
7090:           MatDiagonalScale(M,pcis->vec1_N,NULL);
7091:         } else {
7092:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
7093:           MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
7094:         }
7095:         MatISSetLocalMat(benign_global,M);
7096:         MatDestroy(&M);
7097:         MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
7098:         MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
7099:       }
7100:       if (pcbddc->user_ChangeOfBasisMatrix) {
7101:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
7102:         MatDestroy(&benign_global);
7103:       } else if (pcbddc->benign_have_null) {
7104:         pcbddc->ChangeOfBasisMatrix = benign_global;
7105:       }
7106:     }
7107:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7108:       IS             is_global;
7109:       const PetscInt *gidxs;

7111:       ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
7112:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
7113:       ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
7114:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7115:       ISDestroy(&is_global);
7116:     }
7117:   }
7118:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7119:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7120:   }

7122:   if (!pcbddc->fake_change) {
7123:     /* add pressure dofs to set of primal nodes for numbering purposes */
7124:     for (i=0;i<pcbddc->benign_n;i++) {
7125:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7126:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7127:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7128:       pcbddc->local_primal_size_cc++;
7129:       pcbddc->local_primal_size++;
7130:     }

7132:     /* check if a new primal space has been introduced (also take into account benign trick) */
7133:     pcbddc->new_primal_space_local = PETSC_TRUE;
7134:     if (olocal_primal_size == pcbddc->local_primal_size) {
7135:       PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7136:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7137:       if (!pcbddc->new_primal_space_local) {
7138:         PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7139:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7140:       }
7141:     }
7142:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7143:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7144:   }
7145:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

7147:   /* flush dbg viewer */
7148:   if (pcbddc->dbg_flag) {
7149:     PetscViewerFlush(pcbddc->dbg_viewer);
7150:   }

7152:   /* free workspace */
7153:   PetscBTDestroy(&qr_needed_idx);
7154:   PetscBTDestroy(&change_basis);
7155:   if (!pcbddc->adaptive_selection) {
7156:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7157:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7158:   } else {
7159:     PetscFree5(pcbddc->adaptive_constraints_n,
7160:                       pcbddc->adaptive_constraints_idxs_ptr,
7161:                       pcbddc->adaptive_constraints_data_ptr,
7162:                       pcbddc->adaptive_constraints_idxs,
7163:                       pcbddc->adaptive_constraints_data);
7164:     PetscFree(constraints_n);
7165:     PetscFree(constraints_idxs_B);
7166:   }
7167:   return(0);
7168: }

7170: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7171: {
7172:   ISLocalToGlobalMapping map;
7173:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7174:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
7175:   PetscInt               i,N;
7176:   PetscBool              rcsr = PETSC_FALSE;
7177:   PetscErrorCode         ierr;

7180:   if (pcbddc->recompute_topography) {
7181:     pcbddc->graphanalyzed = PETSC_FALSE;
7182:     /* Reset previously computed graph */
7183:     PCBDDCGraphReset(pcbddc->mat_graph);
7184:     /* Init local Graph struct */
7185:     MatGetSize(pc->pmat,&N,NULL);
7186:     MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7187:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

7189:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7190:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7191:     }
7192:     /* Check validity of the csr graph passed in by the user */
7193:     if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);

7195:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7196:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7197:       PetscInt  *xadj,*adjncy;
7198:       PetscInt  nvtxs;
7199:       PetscBool flg_row=PETSC_FALSE;

7201:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7202:       if (flg_row) {
7203:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7204:         pcbddc->computed_rowadj = PETSC_TRUE;
7205:       }
7206:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7207:       rcsr = PETSC_TRUE;
7208:     }
7209:     if (pcbddc->dbg_flag) {
7210:       PetscViewerFlush(pcbddc->dbg_viewer);
7211:     }

7213:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7214:       PetscReal    *lcoords;
7215:       PetscInt     n;
7216:       MPI_Datatype dimrealtype;

7218:       /* TODO: support for blocked */
7219:       if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7220:       MatGetLocalSize(matis->A,&n,NULL);
7221:       PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7222:       MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7223:       MPI_Type_commit(&dimrealtype);
7224:       PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);
7225:       PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);
7226:       MPI_Type_free(&dimrealtype);
7227:       PetscFree(pcbddc->mat_graph->coords);

7229:       pcbddc->mat_graph->coords = lcoords;
7230:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7231:       pcbddc->mat_graph->cnloc  = n;
7232:     }
7233:     if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7234:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);

7236:     /* Setup of Graph */
7237:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7238:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

7240:     /* attach info on disconnected subdomains if present */
7241:     if (pcbddc->n_local_subs) {
7242:       PetscInt *local_subs,n,totn;

7244:       MatGetLocalSize(matis->A,&n,NULL);
7245:       PetscMalloc1(n,&local_subs);
7246:       for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7247:       for (i=0;i<pcbddc->n_local_subs;i++) {
7248:         const PetscInt *idxs;
7249:         PetscInt       nl,j;

7251:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
7252:         ISGetIndices(pcbddc->local_subs[i],&idxs);
7253:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7254:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7255:       }
7256:       for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7257:       pcbddc->mat_graph->n_local_subs = totn + 1;
7258:       pcbddc->mat_graph->local_subs = local_subs;
7259:     }
7260:   }

7262:   if (!pcbddc->graphanalyzed) {
7263:     /* Graph's connected components analysis */
7264:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7265:     pcbddc->graphanalyzed = PETSC_TRUE;
7266:     pcbddc->corner_selected = pcbddc->corner_selection;
7267:   }
7268:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7269:   return(0);
7270: }

7272: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7273: {
7274:   PetscInt       i,j,n;
7275:   PetscScalar    *alphas;
7276:   PetscReal      norm,*onorms;

7280:   n = *nio;
7281:   if (!n) return(0);
7282:   PetscMalloc2(n,&alphas,n,&onorms);
7283:   VecNormalize(vecs[0],&norm);
7284:   if (norm < PETSC_SMALL) {
7285:     onorms[0] = 0.0;
7286:     VecSet(vecs[0],0.0);
7287:   } else {
7288:     onorms[0] = norm;
7289:   }

7291:   for (i=1;i<n;i++) {
7292:     VecMDot(vecs[i],i,vecs,alphas);
7293:     for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7294:     VecMAXPY(vecs[i],i,alphas,vecs);
7295:     VecNormalize(vecs[i],&norm);
7296:     if (norm < PETSC_SMALL) {
7297:       onorms[i] = 0.0;
7298:       VecSet(vecs[i],0.0);
7299:     } else {
7300:       onorms[i] = norm;
7301:     }
7302:   }
7303:   /* push nonzero vectors at the beginning */
7304:   for (i=0;i<n;i++) {
7305:     if (onorms[i] == 0.0) {
7306:       for (j=i+1;j<n;j++) {
7307:         if (onorms[j] != 0.0) {
7308:           VecCopy(vecs[j],vecs[i]);
7309:           onorms[j] = 0.0;
7310:         }
7311:       }
7312:     }
7313:   }
7314:   for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7315:   PetscFree2(alphas,onorms);
7316:   return(0);
7317: }

7319: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7320: {
7321:   Mat            A;
7322:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
7323:   PetscMPIInt    size,rank,color;
7324:   PetscInt       *xadj,*adjncy;
7325:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7326:   PetscInt       im_active,active_procs,N,n,i,j,threshold = 2;
7327:   PetscInt       void_procs,*procs_candidates = NULL;
7328:   PetscInt       xadj_count,*count;
7329:   PetscBool      ismatis,use_vwgt=PETSC_FALSE;
7330:   PetscSubcomm   psubcomm;
7331:   MPI_Comm       subcomm;

7336:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7337:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7340:   if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);

7342:   if (have_void) *have_void = PETSC_FALSE;
7343:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7344:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7345:   MatISGetLocalMat(mat,&A);
7346:   MatGetLocalSize(A,&n,NULL);
7347:   im_active = !!n;
7348:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7349:   void_procs = size - active_procs;
7350:   /* get ranks of of non-active processes in mat communicator */
7351:   if (void_procs) {
7352:     PetscInt ncand;

7354:     if (have_void) *have_void = PETSC_TRUE;
7355:     PetscMalloc1(size,&procs_candidates);
7356:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7357:     for (i=0,ncand=0;i<size;i++) {
7358:       if (!procs_candidates[i]) {
7359:         procs_candidates[ncand++] = i;
7360:       }
7361:     }
7362:     /* force n_subdomains to be not greater that the number of non-active processes */
7363:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
7364:   }

7366:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7367:      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7368:   MatGetSize(mat,&N,NULL);
7369:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7370:     PetscInt issize,isidx,dest;
7371:     if (*n_subdomains == 1) dest = 0;
7372:     else dest = rank;
7373:     if (im_active) {
7374:       issize = 1;
7375:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7376:         isidx = procs_candidates[dest];
7377:       } else {
7378:         isidx = dest;
7379:       }
7380:     } else {
7381:       issize = 0;
7382:       isidx = -1;
7383:     }
7384:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7385:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7386:     PetscFree(procs_candidates);
7387:     return(0);
7388:   }
7389:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7390:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7391:   threshold = PetscMax(threshold,2);

7393:   /* Get info on mapping */
7394:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

7396:   /* build local CSR graph of subdomains' connectivity */
7397:   PetscMalloc1(2,&xadj);
7398:   xadj[0] = 0;
7399:   xadj[1] = PetscMax(n_neighs-1,0);
7400:   PetscMalloc1(xadj[1],&adjncy);
7401:   PetscMalloc1(xadj[1],&adjncy_wgt);
7402:   PetscCalloc1(n,&count);
7403:   for (i=1;i<n_neighs;i++)
7404:     for (j=0;j<n_shared[i];j++)
7405:       count[shared[i][j]] += 1;

7407:   xadj_count = 0;
7408:   for (i=1;i<n_neighs;i++) {
7409:     for (j=0;j<n_shared[i];j++) {
7410:       if (count[shared[i][j]] < threshold) {
7411:         adjncy[xadj_count] = neighs[i];
7412:         adjncy_wgt[xadj_count] = n_shared[i];
7413:         xadj_count++;
7414:         break;
7415:       }
7416:     }
7417:   }
7418:   xadj[1] = xadj_count;
7419:   PetscFree(count);
7420:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7421:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

7423:   PetscMalloc1(1,&ranks_send_to_idx);

7425:   /* Restrict work on active processes only */
7426:   PetscMPIIntCast(im_active,&color);
7427:   if (void_procs) {
7428:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7429:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7430:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7431:     subcomm = PetscSubcommChild(psubcomm);
7432:   } else {
7433:     psubcomm = NULL;
7434:     subcomm = PetscObjectComm((PetscObject)mat);
7435:   }

7437:   v_wgt = NULL;
7438:   if (!color) {
7439:     PetscFree(xadj);
7440:     PetscFree(adjncy);
7441:     PetscFree(adjncy_wgt);
7442:   } else {
7443:     Mat             subdomain_adj;
7444:     IS              new_ranks,new_ranks_contig;
7445:     MatPartitioning partitioner;
7446:     PetscInt        rstart=0,rend=0;
7447:     PetscInt        *is_indices,*oldranks;
7448:     PetscMPIInt     size;
7449:     PetscBool       aggregate;

7451:     MPI_Comm_size(subcomm,&size);
7452:     if (void_procs) {
7453:       PetscInt prank = rank;
7454:       PetscMalloc1(size,&oldranks);
7455:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7456:       for (i=0;i<xadj[1];i++) {
7457:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7458:       }
7459:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7460:     } else {
7461:       oldranks = NULL;
7462:     }
7463:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7464:     if (aggregate) { /* TODO: all this part could be made more efficient */
7465:       PetscInt    lrows,row,ncols,*cols;
7466:       PetscMPIInt nrank;
7467:       PetscScalar *vals;

7469:       MPI_Comm_rank(subcomm,&nrank);
7470:       lrows = 0;
7471:       if (nrank<redprocs) {
7472:         lrows = size/redprocs;
7473:         if (nrank<size%redprocs) lrows++;
7474:       }
7475:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7476:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7477:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7478:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7479:       row = nrank;
7480:       ncols = xadj[1]-xadj[0];
7481:       cols = adjncy;
7482:       PetscMalloc1(ncols,&vals);
7483:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7484:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7485:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7486:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7487:       PetscFree(xadj);
7488:       PetscFree(adjncy);
7489:       PetscFree(adjncy_wgt);
7490:       PetscFree(vals);
7491:       if (use_vwgt) {
7492:         Vec               v;
7493:         const PetscScalar *array;
7494:         PetscInt          nl;

7496:         MatCreateVecs(subdomain_adj,&v,NULL);
7497:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7498:         VecAssemblyBegin(v);
7499:         VecAssemblyEnd(v);
7500:         VecGetLocalSize(v,&nl);
7501:         VecGetArrayRead(v,&array);
7502:         PetscMalloc1(nl,&v_wgt);
7503:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7504:         VecRestoreArrayRead(v,&array);
7505:         VecDestroy(&v);
7506:       }
7507:     } else {
7508:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7509:       if (use_vwgt) {
7510:         PetscMalloc1(1,&v_wgt);
7511:         v_wgt[0] = n;
7512:       }
7513:     }
7514:     /* MatView(subdomain_adj,0); */

7516:     /* Partition */
7517:     MatPartitioningCreate(subcomm,&partitioner);
7518: #if defined(PETSC_HAVE_PTSCOTCH)
7519:     MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7520: #elif defined(PETSC_HAVE_PARMETIS)
7521:     MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7522: #else
7523:     MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7524: #endif
7525:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7526:     if (v_wgt) {
7527:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
7528:     }
7529:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7530:     MatPartitioningSetNParts(partitioner,*n_subdomains);
7531:     MatPartitioningSetFromOptions(partitioner);
7532:     MatPartitioningApply(partitioner,&new_ranks);
7533:     /* MatPartitioningView(partitioner,0); */

7535:     /* renumber new_ranks to avoid "holes" in new set of processors */
7536:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7537:     ISDestroy(&new_ranks);
7538:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7539:     if (!aggregate) {
7540:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7541:         if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7542:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7543:       } else if (oldranks) {
7544:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7545:       } else {
7546:         ranks_send_to_idx[0] = is_indices[0];
7547:       }
7548:     } else {
7549:       PetscInt    idx = 0;
7550:       PetscMPIInt tag;
7551:       MPI_Request *reqs;

7553:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7554:       PetscMalloc1(rend-rstart,&reqs);
7555:       for (i=rstart;i<rend;i++) {
7556:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7557:       }
7558:       MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7559:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7560:       PetscFree(reqs);
7561:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7562:         if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7563:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7564:       } else if (oldranks) {
7565:         ranks_send_to_idx[0] = oldranks[idx];
7566:       } else {
7567:         ranks_send_to_idx[0] = idx;
7568:       }
7569:     }
7570:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7571:     /* clean up */
7572:     PetscFree(oldranks);
7573:     ISDestroy(&new_ranks_contig);
7574:     MatDestroy(&subdomain_adj);
7575:     MatPartitioningDestroy(&partitioner);
7576:   }
7577:   PetscSubcommDestroy(&psubcomm);
7578:   PetscFree(procs_candidates);

7580:   /* assemble parallel IS for sends */
7581:   i = 1;
7582:   if (!color) i=0;
7583:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7584:   return(0);
7585: }

7587: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

7589: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7590: {
7591:   Mat                    local_mat;
7592:   IS                     is_sends_internal;
7593:   PetscInt               rows,cols,new_local_rows;
7594:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7595:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
7596:   ISLocalToGlobalMapping l2gmap;
7597:   PetscInt*              l2gmap_indices;
7598:   const PetscInt*        is_indices;
7599:   MatType                new_local_type;
7600:   /* buffers */
7601:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7602:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7603:   PetscInt               *recv_buffer_idxs_local;
7604:   PetscScalar            *ptr_vals,*recv_buffer_vals;
7605:   const PetscScalar      *send_buffer_vals;
7606:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7607:   /* MPI */
7608:   MPI_Comm               comm,comm_n;
7609:   PetscSubcomm           subcomm;
7610:   PetscMPIInt            n_sends,n_recvs,size;
7611:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7612:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7613:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7614:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7615:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7616:   PetscErrorCode         ierr;

7620:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7621:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7628:   if (nvecs) {
7629:     if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7631:   }
7632:   /* further checks */
7633:   MatISGetLocalMat(mat,&local_mat);
7634:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7635:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7636:   MatGetSize(local_mat,&rows,&cols);
7637:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7638:   if (reuse && *mat_n) {
7639:     PetscInt mrows,mcols,mnrows,mncols;
7641:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7642:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7643:     MatGetSize(mat,&mrows,&mcols);
7644:     MatGetSize(*mat_n,&mnrows,&mncols);
7645:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7646:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7647:   }
7648:   MatGetBlockSize(local_mat,&bs);

7651:   /* prepare IS for sending if not provided */
7652:   if (!is_sends) {
7653:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7654:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7655:   } else {
7656:     PetscObjectReference((PetscObject)is_sends);
7657:     is_sends_internal = is_sends;
7658:   }

7660:   /* get comm */
7661:   PetscObjectGetComm((PetscObject)mat,&comm);

7663:   /* compute number of sends */
7664:   ISGetLocalSize(is_sends_internal,&i);
7665:   PetscMPIIntCast(i,&n_sends);

7667:   /* compute number of receives */
7668:   MPI_Comm_size(comm,&size);
7669:   PetscMalloc1(size,&iflags);
7670:   PetscArrayzero(iflags,size);
7671:   ISGetIndices(is_sends_internal,&is_indices);
7672:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7673:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7674:   PetscFree(iflags);

7676:   /* restrict comm if requested */
7677:   subcomm = NULL;
7678:   destroy_mat = PETSC_FALSE;
7679:   if (restrict_comm) {
7680:     PetscMPIInt color,subcommsize;

7682:     color = 0;
7683:     if (restrict_full) {
7684:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7685:     } else {
7686:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7687:     }
7688:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7689:     subcommsize = size - subcommsize;
7690:     /* check if reuse has been requested */
7691:     if (reuse) {
7692:       if (*mat_n) {
7693:         PetscMPIInt subcommsize2;
7694:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7695:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7696:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7697:       } else {
7698:         comm_n = PETSC_COMM_SELF;
7699:       }
7700:     } else { /* MAT_INITIAL_MATRIX */
7701:       PetscMPIInt rank;

7703:       MPI_Comm_rank(comm,&rank);
7704:       PetscSubcommCreate(comm,&subcomm);
7705:       PetscSubcommSetNumber(subcomm,2);
7706:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7707:       comm_n = PetscSubcommChild(subcomm);
7708:     }
7709:     /* flag to destroy *mat_n if not significative */
7710:     if (color) destroy_mat = PETSC_TRUE;
7711:   } else {
7712:     comm_n = comm;
7713:   }

7715:   /* prepare send/receive buffers */
7716:   PetscMalloc1(size,&ilengths_idxs);
7717:   PetscArrayzero(ilengths_idxs,size);
7718:   PetscMalloc1(size,&ilengths_vals);
7719:   PetscArrayzero(ilengths_vals,size);
7720:   if (nis) {
7721:     PetscCalloc1(size,&ilengths_idxs_is);
7722:   }

7724:   /* Get data from local matrices */
7725:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7726:     /* TODO: See below some guidelines on how to prepare the local buffers */
7727:     /*
7728:        send_buffer_vals should contain the raw values of the local matrix
7729:        send_buffer_idxs should contain:
7730:        - MatType_PRIVATE type
7731:        - PetscInt        size_of_l2gmap
7732:        - PetscInt        global_row_indices[size_of_l2gmap]
7733:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7734:     */
7735:   else {
7736:     MatDenseGetArrayRead(local_mat,&send_buffer_vals);
7737:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7738:     PetscMalloc1(i+2,&send_buffer_idxs);
7739:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7740:     send_buffer_idxs[1] = i;
7741:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7742:     PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);
7743:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7744:     PetscMPIIntCast(i,&len);
7745:     for (i=0;i<n_sends;i++) {
7746:       ilengths_vals[is_indices[i]] = len*len;
7747:       ilengths_idxs[is_indices[i]] = len+2;
7748:     }
7749:   }
7750:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7751:   /* additional is (if any) */
7752:   if (nis) {
7753:     PetscMPIInt psum;
7754:     PetscInt j;
7755:     for (j=0,psum=0;j<nis;j++) {
7756:       PetscInt plen;
7757:       ISGetLocalSize(isarray[j],&plen);
7758:       PetscMPIIntCast(plen,&len);
7759:       psum += len+1; /* indices + lenght */
7760:     }
7761:     PetscMalloc1(psum,&send_buffer_idxs_is);
7762:     for (j=0,psum=0;j<nis;j++) {
7763:       PetscInt plen;
7764:       const PetscInt *is_array_idxs;
7765:       ISGetLocalSize(isarray[j],&plen);
7766:       send_buffer_idxs_is[psum] = plen;
7767:       ISGetIndices(isarray[j],&is_array_idxs);
7768:       PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);
7769:       ISRestoreIndices(isarray[j],&is_array_idxs);
7770:       psum += plen+1; /* indices + lenght */
7771:     }
7772:     for (i=0;i<n_sends;i++) {
7773:       ilengths_idxs_is[is_indices[i]] = psum;
7774:     }
7775:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7776:   }
7777:   MatISRestoreLocalMat(mat,&local_mat);

7779:   buf_size_idxs = 0;
7780:   buf_size_vals = 0;
7781:   buf_size_idxs_is = 0;
7782:   buf_size_vecs = 0;
7783:   for (i=0;i<n_recvs;i++) {
7784:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7785:     buf_size_vals += (PetscInt)olengths_vals[i];
7786:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7787:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7788:   }
7789:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7790:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7791:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7792:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7794:   /* get new tags for clean communications */
7795:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7796:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7797:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7798:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7800:   /* allocate for requests */
7801:   PetscMalloc1(n_sends,&send_req_idxs);
7802:   PetscMalloc1(n_sends,&send_req_vals);
7803:   PetscMalloc1(n_sends,&send_req_idxs_is);
7804:   PetscMalloc1(n_sends,&send_req_vecs);
7805:   PetscMalloc1(n_recvs,&recv_req_idxs);
7806:   PetscMalloc1(n_recvs,&recv_req_vals);
7807:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7808:   PetscMalloc1(n_recvs,&recv_req_vecs);

7810:   /* communications */
7811:   ptr_idxs = recv_buffer_idxs;
7812:   ptr_vals = recv_buffer_vals;
7813:   ptr_idxs_is = recv_buffer_idxs_is;
7814:   ptr_vecs = recv_buffer_vecs;
7815:   for (i=0;i<n_recvs;i++) {
7816:     source_dest = onodes[i];
7817:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7818:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7819:     ptr_idxs += olengths_idxs[i];
7820:     ptr_vals += olengths_vals[i];
7821:     if (nis) {
7822:       source_dest = onodes_is[i];
7823:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7824:       ptr_idxs_is += olengths_idxs_is[i];
7825:     }
7826:     if (nvecs) {
7827:       source_dest = onodes[i];
7828:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7829:       ptr_vecs += olengths_idxs[i]-2;
7830:     }
7831:   }
7832:   for (i=0;i<n_sends;i++) {
7833:     PetscMPIIntCast(is_indices[i],&source_dest);
7834:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7835:     MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7836:     if (nis) {
7837:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7838:     }
7839:     if (nvecs) {
7840:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7841:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7842:     }
7843:   }
7844:   ISRestoreIndices(is_sends_internal,&is_indices);
7845:   ISDestroy(&is_sends_internal);

7847:   /* assemble new l2g map */
7848:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7849:   ptr_idxs = recv_buffer_idxs;
7850:   new_local_rows = 0;
7851:   for (i=0;i<n_recvs;i++) {
7852:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7853:     ptr_idxs += olengths_idxs[i];
7854:   }
7855:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7856:   ptr_idxs = recv_buffer_idxs;
7857:   new_local_rows = 0;
7858:   for (i=0;i<n_recvs;i++) {
7859:     PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));
7860:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7861:     ptr_idxs += olengths_idxs[i];
7862:   }
7863:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7864:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7865:   PetscFree(l2gmap_indices);

7867:   /* infer new local matrix type from received local matrices type */
7868:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7869:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7870:   if (n_recvs) {
7871:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7872:     ptr_idxs = recv_buffer_idxs;
7873:     for (i=0;i<n_recvs;i++) {
7874:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7875:         new_local_type_private = MATAIJ_PRIVATE;
7876:         break;
7877:       }
7878:       ptr_idxs += olengths_idxs[i];
7879:     }
7880:     switch (new_local_type_private) {
7881:       case MATDENSE_PRIVATE:
7882:         new_local_type = MATSEQAIJ;
7883:         bs = 1;
7884:         break;
7885:       case MATAIJ_PRIVATE:
7886:         new_local_type = MATSEQAIJ;
7887:         bs = 1;
7888:         break;
7889:       case MATBAIJ_PRIVATE:
7890:         new_local_type = MATSEQBAIJ;
7891:         break;
7892:       case MATSBAIJ_PRIVATE:
7893:         new_local_type = MATSEQSBAIJ;
7894:         break;
7895:       default:
7896:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7897:     }
7898:   } else { /* by default, new_local_type is seqaij */
7899:     new_local_type = MATSEQAIJ;
7900:     bs = 1;
7901:   }

7903:   /* create MATIS object if needed */
7904:   if (!reuse) {
7905:     MatGetSize(mat,&rows,&cols);
7906:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7907:   } else {
7908:     /* it also destroys the local matrices */
7909:     if (*mat_n) {
7910:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7911:     } else { /* this is a fake object */
7912:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7913:     }
7914:   }
7915:   MatISGetLocalMat(*mat_n,&local_mat);
7916:   MatSetType(local_mat,new_local_type);

7918:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7920:   /* Global to local map of received indices */
7921:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7922:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7923:   ISLocalToGlobalMappingDestroy(&l2gmap);

7925:   /* restore attributes -> type of incoming data and its size */
7926:   buf_size_idxs = 0;
7927:   for (i=0;i<n_recvs;i++) {
7928:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7929:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7930:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7931:   }
7932:   PetscFree(recv_buffer_idxs);

7934:   /* set preallocation */
7935:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7936:   if (!newisdense) {
7937:     PetscInt *new_local_nnz=NULL;

7939:     ptr_idxs = recv_buffer_idxs_local;
7940:     if (n_recvs) {
7941:       PetscCalloc1(new_local_rows,&new_local_nnz);
7942:     }
7943:     for (i=0;i<n_recvs;i++) {
7944:       PetscInt j;
7945:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7946:         for (j=0;j<*(ptr_idxs+1);j++) {
7947:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7948:         }
7949:       } else {
7950:         /* TODO */
7951:       }
7952:       ptr_idxs += olengths_idxs[i];
7953:     }
7954:     if (new_local_nnz) {
7955:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7956:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7957:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7958:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7959:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7960:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7961:     } else {
7962:       MatSetUp(local_mat);
7963:     }
7964:     PetscFree(new_local_nnz);
7965:   } else {
7966:     MatSetUp(local_mat);
7967:   }

7969:   /* set values */
7970:   ptr_vals = recv_buffer_vals;
7971:   ptr_idxs = recv_buffer_idxs_local;
7972:   for (i=0;i<n_recvs;i++) {
7973:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7974:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7975:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7976:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7977:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7978:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7979:     } else {
7980:       /* TODO */
7981:     }
7982:     ptr_idxs += olengths_idxs[i];
7983:     ptr_vals += olengths_vals[i];
7984:   }
7985:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7986:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7987:   MatISRestoreLocalMat(*mat_n,&local_mat);
7988:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7989:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7990:   PetscFree(recv_buffer_vals);

7992: #if 0
7993:   if (!restrict_comm) { /* check */
7994:     Vec       lvec,rvec;
7995:     PetscReal infty_error;

7997:     MatCreateVecs(mat,&rvec,&lvec);
7998:     VecSetRandom(rvec,NULL);
7999:     MatMult(mat,rvec,lvec);
8000:     VecScale(lvec,-1.0);
8001:     MatMultAdd(*mat_n,rvec,lvec,lvec);
8002:     VecNorm(lvec,NORM_INFINITY,&infty_error);
8003:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
8004:     VecDestroy(&rvec);
8005:     VecDestroy(&lvec);
8006:   }
8007: #endif

8009:   /* assemble new additional is (if any) */
8010:   if (nis) {
8011:     PetscInt **temp_idxs,*count_is,j,psum;

8013:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
8014:     PetscCalloc1(nis,&count_is);
8015:     ptr_idxs = recv_buffer_idxs_is;
8016:     psum = 0;
8017:     for (i=0;i<n_recvs;i++) {
8018:       for (j=0;j<nis;j++) {
8019:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8020:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
8021:         psum += plen;
8022:         ptr_idxs += plen+1; /* shift pointer to received data */
8023:       }
8024:     }
8025:     PetscMalloc1(nis,&temp_idxs);
8026:     PetscMalloc1(psum,&temp_idxs[0]);
8027:     for (i=1;i<nis;i++) {
8028:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
8029:     }
8030:     PetscArrayzero(count_is,nis);
8031:     ptr_idxs = recv_buffer_idxs_is;
8032:     for (i=0;i<n_recvs;i++) {
8033:       for (j=0;j<nis;j++) {
8034:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8035:         PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);
8036:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
8037:         ptr_idxs += plen+1; /* shift pointer to received data */
8038:       }
8039:     }
8040:     for (i=0;i<nis;i++) {
8041:       ISDestroy(&isarray[i]);
8042:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
8043:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
8044:     }
8045:     PetscFree(count_is);
8046:     PetscFree(temp_idxs[0]);
8047:     PetscFree(temp_idxs);
8048:   }
8049:   /* free workspace */
8050:   PetscFree(recv_buffer_idxs_is);
8051:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
8052:   PetscFree(send_buffer_idxs);
8053:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
8054:   if (isdense) {
8055:     MatISGetLocalMat(mat,&local_mat);
8056:     MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);
8057:     MatISRestoreLocalMat(mat,&local_mat);
8058:   } else {
8059:     /* PetscFree(send_buffer_vals); */
8060:   }
8061:   if (nis) {
8062:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
8063:     PetscFree(send_buffer_idxs_is);
8064:   }

8066:   if (nvecs) {
8067:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
8068:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
8069:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8070:     VecDestroy(&nnsp_vec[0]);
8071:     VecCreate(comm_n,&nnsp_vec[0]);
8072:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
8073:     VecSetType(nnsp_vec[0],VECSTANDARD);
8074:     /* set values */
8075:     ptr_vals = recv_buffer_vecs;
8076:     ptr_idxs = recv_buffer_idxs_local;
8077:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
8078:     for (i=0;i<n_recvs;i++) {
8079:       PetscInt j;
8080:       for (j=0;j<*(ptr_idxs+1);j++) {
8081:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
8082:       }
8083:       ptr_idxs += olengths_idxs[i];
8084:       ptr_vals += olengths_idxs[i]-2;
8085:     }
8086:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8087:     VecAssemblyBegin(nnsp_vec[0]);
8088:     VecAssemblyEnd(nnsp_vec[0]);
8089:   }

8091:   PetscFree(recv_buffer_vecs);
8092:   PetscFree(recv_buffer_idxs_local);
8093:   PetscFree(recv_req_idxs);
8094:   PetscFree(recv_req_vals);
8095:   PetscFree(recv_req_vecs);
8096:   PetscFree(recv_req_idxs_is);
8097:   PetscFree(send_req_idxs);
8098:   PetscFree(send_req_vals);
8099:   PetscFree(send_req_vecs);
8100:   PetscFree(send_req_idxs_is);
8101:   PetscFree(ilengths_vals);
8102:   PetscFree(ilengths_idxs);
8103:   PetscFree(olengths_vals);
8104:   PetscFree(olengths_idxs);
8105:   PetscFree(onodes);
8106:   if (nis) {
8107:     PetscFree(ilengths_idxs_is);
8108:     PetscFree(olengths_idxs_is);
8109:     PetscFree(onodes_is);
8110:   }
8111:   PetscSubcommDestroy(&subcomm);
8112:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
8113:     MatDestroy(mat_n);
8114:     for (i=0;i<nis;i++) {
8115:       ISDestroy(&isarray[i]);
8116:     }
8117:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8118:       VecDestroy(&nnsp_vec[0]);
8119:     }
8120:     *mat_n = NULL;
8121:   }
8122:   return(0);
8123: }

8125: /* temporary hack into ksp private data structure */
8126: #include <petsc/private/kspimpl.h>

8128: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8129: {
8130:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
8131:   PC_IS                  *pcis = (PC_IS*)pc->data;
8132:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
8133:   Mat                    coarsedivudotp = NULL;
8134:   Mat                    coarseG,t_coarse_mat_is;
8135:   MatNullSpace           CoarseNullSpace = NULL;
8136:   ISLocalToGlobalMapping coarse_islg;
8137:   IS                     coarse_is,*isarray,corners;
8138:   PetscInt               i,im_active=-1,active_procs=-1;
8139:   PetscInt               nis,nisdofs,nisneu,nisvert;
8140:   PetscInt               coarse_eqs_per_proc;
8141:   PC                     pc_temp;
8142:   PCType                 coarse_pc_type;
8143:   KSPType                coarse_ksp_type;
8144:   PetscBool              multilevel_requested,multilevel_allowed;
8145:   PetscBool              coarse_reuse;
8146:   PetscInt               ncoarse,nedcfield;
8147:   PetscBool              compute_vecs = PETSC_FALSE;
8148:   PetscScalar            *array;
8149:   MatReuse               coarse_mat_reuse;
8150:   PetscBool              restr, full_restr, have_void;
8151:   PetscMPIInt            size;
8152:   PetscErrorCode         ierr;

8155:   PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8156:   /* Assign global numbering to coarse dofs */
8157:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8158:     PetscInt ocoarse_size;
8159:     compute_vecs = PETSC_TRUE;

8161:     pcbddc->new_primal_space = PETSC_TRUE;
8162:     ocoarse_size = pcbddc->coarse_size;
8163:     PetscFree(pcbddc->global_primal_indices);
8164:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8165:     /* see if we can avoid some work */
8166:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8167:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8168:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8169:         KSPReset(pcbddc->coarse_ksp);
8170:         coarse_reuse = PETSC_FALSE;
8171:       } else { /* we can safely reuse already computed coarse matrix */
8172:         coarse_reuse = PETSC_TRUE;
8173:       }
8174:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8175:       coarse_reuse = PETSC_FALSE;
8176:     }
8177:     /* reset any subassembling information */
8178:     if (!coarse_reuse || pcbddc->recompute_topography) {
8179:       ISDestroy(&pcbddc->coarse_subassembling);
8180:     }
8181:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8182:     coarse_reuse = PETSC_TRUE;
8183:   }
8184:   if (coarse_reuse && pcbddc->coarse_ksp) {
8185:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8186:     PetscObjectReference((PetscObject)coarse_mat);
8187:     coarse_mat_reuse = MAT_REUSE_MATRIX;
8188:   } else {
8189:     coarse_mat = NULL;
8190:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8191:   }

8193:   /* creates temporary l2gmap and IS for coarse indexes */
8194:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8195:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

8197:   /* creates temporary MATIS object for coarse matrix */
8198:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8199:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
8200:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8201:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8202:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8203:   MatDestroy(&coarse_submat_dense);

8205:   /* count "active" (i.e. with positive local size) and "void" processes */
8206:   im_active = !!(pcis->n);
8207:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

8209:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8210:   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8211:   /* full_restr : just use the receivers from the subassembling pattern */
8212:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8213:   coarse_mat_is        = NULL;
8214:   multilevel_allowed   = PETSC_FALSE;
8215:   multilevel_requested = PETSC_FALSE;
8216:   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8217:   if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8218:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8219:   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8220:   if (multilevel_requested) {
8221:     ncoarse    = active_procs/pcbddc->coarsening_ratio;
8222:     restr      = PETSC_FALSE;
8223:     full_restr = PETSC_FALSE;
8224:   } else {
8225:     ncoarse    = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8226:     restr      = PETSC_TRUE;
8227:     full_restr = PETSC_TRUE;
8228:   }
8229:   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8230:   ncoarse = PetscMax(1,ncoarse);
8231:   if (!pcbddc->coarse_subassembling) {
8232:     if (pcbddc->coarsening_ratio > 1) {
8233:       if (multilevel_requested) {
8234:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8235:       } else {
8236:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8237:       }
8238:     } else {
8239:       PetscMPIInt rank;

8241:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8242:       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8243:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8244:     }
8245:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8246:     PetscInt    psum;
8247:     if (pcbddc->coarse_ksp) psum = 1;
8248:     else psum = 0;
8249:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8250:     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8251:   }
8252:   /* determine if we can go multilevel */
8253:   if (multilevel_requested) {
8254:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8255:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8256:   }
8257:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

8259:   /* dump subassembling pattern */
8260:   if (pcbddc->dbg_flag && multilevel_allowed) {
8261:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8262:   }
8263:   /* compute dofs splitting and neumann boundaries for coarse dofs */
8264:   nedcfield = -1;
8265:   corners = NULL;
8266:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8267:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
8268:     const PetscInt         *idxs;
8269:     ISLocalToGlobalMapping tmap;

8271:     /* create map between primal indices (in local representative ordering) and local primal numbering */
8272:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8273:     /* allocate space for temporary storage */
8274:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8275:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8276:     /* allocate for IS array */
8277:     nisdofs = pcbddc->n_ISForDofsLocal;
8278:     if (pcbddc->nedclocal) {
8279:       if (pcbddc->nedfield > -1) {
8280:         nedcfield = pcbddc->nedfield;
8281:       } else {
8282:         nedcfield = 0;
8283:         if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8284:         nisdofs = 1;
8285:       }
8286:     }
8287:     nisneu = !!pcbddc->NeumannBoundariesLocal;
8288:     nisvert = 0; /* nisvert is not used */
8289:     nis = nisdofs + nisneu + nisvert;
8290:     PetscMalloc1(nis,&isarray);
8291:     /* dofs splitting */
8292:     for (i=0;i<nisdofs;i++) {
8293:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
8294:       if (nedcfield != i) {
8295:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8296:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8297:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8298:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8299:       } else {
8300:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
8301:         ISGetIndices(pcbddc->nedclocal,&idxs);
8302:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8303:         if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8304:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
8305:       }
8306:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8307:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8308:       /* ISView(isarray[i],0); */
8309:     }
8310:     /* neumann boundaries */
8311:     if (pcbddc->NeumannBoundariesLocal) {
8312:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8313:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8314:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8315:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8316:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8317:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8318:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8319:       /* ISView(isarray[nisdofs],0); */
8320:     }
8321:     /* coordinates */
8322:     if (pcbddc->corner_selected) {
8323:       PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8324:       ISGetLocalSize(corners,&tsize);
8325:       ISGetIndices(corners,&idxs);
8326:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8327:       if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8328:       ISRestoreIndices(corners,&idxs);
8329:       PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8330:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8331:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8332:     }
8333:     PetscFree(tidxs);
8334:     PetscFree(tidxs2);
8335:     ISLocalToGlobalMappingDestroy(&tmap);
8336:   } else {
8337:     nis = 0;
8338:     nisdofs = 0;
8339:     nisneu = 0;
8340:     nisvert = 0;
8341:     isarray = NULL;
8342:   }
8343:   /* destroy no longer needed map */
8344:   ISLocalToGlobalMappingDestroy(&coarse_islg);

8346:   /* subassemble */
8347:   if (multilevel_allowed) {
8348:     Vec       vp[1];
8349:     PetscInt  nvecs = 0;
8350:     PetscBool reuse,reuser;

8352:     if (coarse_mat) reuse = PETSC_TRUE;
8353:     else reuse = PETSC_FALSE;
8354:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8355:     vp[0] = NULL;
8356:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8357:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8358:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8359:       VecSetType(vp[0],VECSTANDARD);
8360:       nvecs = 1;

8362:       if (pcbddc->divudotp) {
8363:         Mat      B,loc_divudotp;
8364:         Vec      v,p;
8365:         IS       dummy;
8366:         PetscInt np;

8368:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8369:         MatGetSize(loc_divudotp,&np,NULL);
8370:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8371:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8372:         MatCreateVecs(B,&v,&p);
8373:         VecSet(p,1.);
8374:         MatMultTranspose(B,p,v);
8375:         VecDestroy(&p);
8376:         MatDestroy(&B);
8377:         VecGetArray(vp[0],&array);
8378:         VecPlaceArray(pcbddc->vec1_P,array);
8379:         VecRestoreArray(vp[0],&array);
8380:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8381:         VecResetArray(pcbddc->vec1_P);
8382:         ISDestroy(&dummy);
8383:         VecDestroy(&v);
8384:       }
8385:     }
8386:     if (reuser) {
8387:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8388:     } else {
8389:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8390:     }
8391:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8392:       PetscScalar       *arraym;
8393:       const PetscScalar *arrayv;
8394:       PetscInt          nl;
8395:       VecGetLocalSize(vp[0],&nl);
8396:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8397:       MatDenseGetArray(coarsedivudotp,&arraym);
8398:       VecGetArrayRead(vp[0],&arrayv);
8399:       PetscArraycpy(arraym,arrayv,nl);
8400:       VecRestoreArrayRead(vp[0],&arrayv);
8401:       MatDenseRestoreArray(coarsedivudotp,&arraym);
8402:       VecDestroy(&vp[0]);
8403:     } else {
8404:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8405:     }
8406:   } else {
8407:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8408:   }
8409:   if (coarse_mat_is || coarse_mat) {
8410:     if (!multilevel_allowed) {
8411:       MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8412:     } else {
8413:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8414:       if (coarse_mat_is) {
8415:         if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8416:         PetscObjectReference((PetscObject)coarse_mat_is);
8417:         coarse_mat = coarse_mat_is;
8418:       }
8419:     }
8420:   }
8421:   MatDestroy(&t_coarse_mat_is);
8422:   MatDestroy(&coarse_mat_is);

8424:   /* create local to global scatters for coarse problem */
8425:   if (compute_vecs) {
8426:     PetscInt lrows;
8427:     VecDestroy(&pcbddc->coarse_vec);
8428:     if (coarse_mat) {
8429:       MatGetLocalSize(coarse_mat,&lrows,NULL);
8430:     } else {
8431:       lrows = 0;
8432:     }
8433:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8434:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8435:     VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8436:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8437:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8438:   }
8439:   ISDestroy(&coarse_is);

8441:   /* set defaults for coarse KSP and PC */
8442:   if (multilevel_allowed) {
8443:     coarse_ksp_type = KSPRICHARDSON;
8444:     coarse_pc_type  = PCBDDC;
8445:   } else {
8446:     coarse_ksp_type = KSPPREONLY;
8447:     coarse_pc_type  = PCREDUNDANT;
8448:   }

8450:   /* print some info if requested */
8451:   if (pcbddc->dbg_flag) {
8452:     if (!multilevel_allowed) {
8453:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8454:       if (multilevel_requested) {
8455:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8456:       } else if (pcbddc->max_levels) {
8457:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8458:       }
8459:       PetscViewerFlush(pcbddc->dbg_viewer);
8460:     }
8461:   }

8463:   /* communicate coarse discrete gradient */
8464:   coarseG = NULL;
8465:   if (pcbddc->nedcG && multilevel_allowed) {
8466:     MPI_Comm ccomm;
8467:     if (coarse_mat) {
8468:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8469:     } else {
8470:       ccomm = MPI_COMM_NULL;
8471:     }
8472:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8473:   }

8475:   /* create the coarse KSP object only once with defaults */
8476:   if (coarse_mat) {
8477:     PetscBool   isredundant,isbddc,force,valid;
8478:     PetscViewer dbg_viewer = NULL;

8480:     if (pcbddc->dbg_flag) {
8481:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8482:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8483:     }
8484:     if (!pcbddc->coarse_ksp) {
8485:       char   prefix[256],str_level[16];
8486:       size_t len;

8488:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8489:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8490:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8491:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8492:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8493:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8494:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8495:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8496:       /* TODO is this logic correct? should check for coarse_mat type */
8497:       PCSetType(pc_temp,coarse_pc_type);
8498:       /* prefix */
8499:       PetscStrcpy(prefix,"");
8500:       PetscStrcpy(str_level,"");
8501:       if (!pcbddc->current_level) {
8502:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8503:         PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8504:       } else {
8505:         PetscStrlen(((PetscObject)pc)->prefix,&len);
8506:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8507:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8508:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8509:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8510:         PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8511:         PetscStrlcat(prefix,str_level,sizeof(prefix));
8512:       }
8513:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8514:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8515:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8516:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8517:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8518:       /* allow user customization */
8519:       KSPSetFromOptions(pcbddc->coarse_ksp);
8520:       /* get some info after set from options */
8521:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8522:       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8523:       force = PETSC_FALSE;
8524:       PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8525:       PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8526:       PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8527:       if (multilevel_allowed && !force && !valid) {
8528:         isbddc = PETSC_TRUE;
8529:         PCSetType(pc_temp,PCBDDC);
8530:         PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8531:         PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8532:         PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8533:         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8534:           PetscObjectOptionsBegin((PetscObject)pc_temp);
8535:           (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8536:           PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8537:           PetscOptionsEnd();
8538:           pc_temp->setfromoptionscalled++;
8539:         }
8540:       }
8541:     }
8542:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8543:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8544:     if (nisdofs) {
8545:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8546:       for (i=0;i<nisdofs;i++) {
8547:         ISDestroy(&isarray[i]);
8548:       }
8549:     }
8550:     if (nisneu) {
8551:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8552:       ISDestroy(&isarray[nisdofs]);
8553:     }
8554:     if (nisvert) {
8555:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8556:       ISDestroy(&isarray[nis-1]);
8557:     }
8558:     if (coarseG) {
8559:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8560:     }

8562:     /* get some info after set from options */
8563:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);

8565:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8566:     if (isbddc && !multilevel_allowed) {
8567:       PCSetType(pc_temp,coarse_pc_type);
8568:     }
8569:     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8570:     force = PETSC_FALSE;
8571:     PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8572:     PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8573:     if (multilevel_requested && multilevel_allowed && !valid && !force) {
8574:       PCSetType(pc_temp,PCBDDC);
8575:     }
8576:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8577:     if (isredundant) {
8578:       KSP inner_ksp;
8579:       PC  inner_pc;

8581:       PCRedundantGetKSP(pc_temp,&inner_ksp);
8582:       KSPGetPC(inner_ksp,&inner_pc);
8583:     }

8585:     /* parameters which miss an API */
8586:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8587:     if (isbddc) {
8588:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;

8590:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8591:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8592:       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8593:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8594:       if (pcbddc_coarse->benign_saddle_point) {
8595:         Mat                    coarsedivudotp_is;
8596:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8597:         IS                     row,col;
8598:         const PetscInt         *gidxs;
8599:         PetscInt               n,st,M,N;

8601:         MatGetSize(coarsedivudotp,&n,NULL);
8602:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8603:         st   = st-n;
8604:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8605:         MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8606:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
8607:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8608:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8609:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8610:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
8611:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
8612:         ISGetSize(row,&M);
8613:         MatGetSize(coarse_mat,&N,NULL);
8614:         ISDestroy(&row);
8615:         ISDestroy(&col);
8616:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8617:         MatSetType(coarsedivudotp_is,MATIS);
8618:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8619:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8620:         ISLocalToGlobalMappingDestroy(&rl2g);
8621:         ISLocalToGlobalMappingDestroy(&cl2g);
8622:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8623:         MatDestroy(&coarsedivudotp);
8624:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8625:         MatDestroy(&coarsedivudotp_is);
8626:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8627:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8628:       }
8629:     }

8631:     /* propagate symmetry info of coarse matrix */
8632:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8633:     if (pc->pmat->symmetric_set) {
8634:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8635:     }
8636:     if (pc->pmat->hermitian_set) {
8637:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8638:     }
8639:     if (pc->pmat->spd_set) {
8640:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8641:     }
8642:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8643:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8644:     }
8645:     /* set operators */
8646:     MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8647:     MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8648:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8649:     if (pcbddc->dbg_flag) {
8650:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8651:     }
8652:   }
8653:   MatDestroy(&coarseG);
8654:   PetscFree(isarray);
8655: #if 0
8656:   {
8657:     PetscViewer viewer;
8658:     char filename[256];
8659:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8660:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8661:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8662:     MatView(coarse_mat,viewer);
8663:     PetscViewerPopFormat(viewer);
8664:     PetscViewerDestroy(&viewer);
8665:   }
8666: #endif

8668:   if (corners) {
8669:     Vec            gv;
8670:     IS             is;
8671:     const PetscInt *idxs;
8672:     PetscInt       i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8673:     PetscScalar    *coords;

8675:     if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8676:     VecGetSize(pcbddc->coarse_vec,&N);
8677:     VecGetLocalSize(pcbddc->coarse_vec,&n);
8678:     VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8679:     VecSetBlockSize(gv,cdim);
8680:     VecSetSizes(gv,n*cdim,N*cdim);
8681:     VecSetType(gv,VECSTANDARD);
8682:     VecSetFromOptions(gv);
8683:     VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */

8685:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8686:     ISGetLocalSize(is,&n);
8687:     ISGetIndices(is,&idxs);
8688:     PetscMalloc1(n*cdim,&coords);
8689:     for (i=0;i<n;i++) {
8690:       for (d=0;d<cdim;d++) {
8691:         coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8692:       }
8693:     }
8694:     ISRestoreIndices(is,&idxs);
8695:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);

8697:     ISGetLocalSize(corners,&n);
8698:     ISGetIndices(corners,&idxs);
8699:     VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8700:     ISRestoreIndices(corners,&idxs);
8701:     PetscFree(coords);
8702:     VecAssemblyBegin(gv);
8703:     VecAssemblyEnd(gv);
8704:     VecGetArray(gv,&coords);
8705:     if (pcbddc->coarse_ksp) {
8706:       PC        coarse_pc;
8707:       PetscBool isbddc;

8709:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8710:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8711:       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8712:         PetscReal *realcoords;

8714:         VecGetLocalSize(gv,&n);
8715: #if defined(PETSC_USE_COMPLEX)
8716:         PetscMalloc1(n,&realcoords);
8717:         for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8718: #else
8719:         realcoords = coords;
8720: #endif
8721:         PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8722: #if defined(PETSC_USE_COMPLEX)
8723:         PetscFree(realcoords);
8724: #endif
8725:       }
8726:     }
8727:     VecRestoreArray(gv,&coords);
8728:     VecDestroy(&gv);
8729:   }
8730:   ISDestroy(&corners);

8732:   if (pcbddc->coarse_ksp) {
8733:     Vec crhs,csol;

8735:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
8736:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8737:     if (!csol) {
8738:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8739:     }
8740:     if (!crhs) {
8741:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8742:     }
8743:   }
8744:   MatDestroy(&coarsedivudotp);

8746:   /* compute null space for coarse solver if the benign trick has been requested */
8747:   if (pcbddc->benign_null) {

8749:     VecSet(pcbddc->vec1_P,0.);
8750:     for (i=0;i<pcbddc->benign_n;i++) {
8751:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8752:     }
8753:     VecAssemblyBegin(pcbddc->vec1_P);
8754:     VecAssemblyEnd(pcbddc->vec1_P);
8755:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8756:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8757:     if (coarse_mat) {
8758:       Vec         nullv;
8759:       PetscScalar *array,*array2;
8760:       PetscInt    nl;

8762:       MatCreateVecs(coarse_mat,&nullv,NULL);
8763:       VecGetLocalSize(nullv,&nl);
8764:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8765:       VecGetArray(nullv,&array2);
8766:       PetscArraycpy(array2,array,nl);
8767:       VecRestoreArray(nullv,&array2);
8768:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8769:       VecNormalize(nullv,NULL);
8770:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8771:       VecDestroy(&nullv);
8772:     }
8773:   }
8774:   PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);

8776:   PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8777:   if (pcbddc->coarse_ksp) {
8778:     PetscBool ispreonly;

8780:     if (CoarseNullSpace) {
8781:       PetscBool isnull;
8782:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8783:       if (isnull) {
8784:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8785:       }
8786:       /* TODO: add local nullspaces (if any) */
8787:     }
8788:     /* setup coarse ksp */
8789:     KSPSetUp(pcbddc->coarse_ksp);
8790:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8791:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8792:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8793:       KSP       check_ksp;
8794:       KSPType   check_ksp_type;
8795:       PC        check_pc;
8796:       Vec       check_vec,coarse_vec;
8797:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8798:       PetscInt  its;
8799:       PetscBool compute_eigs;
8800:       PetscReal *eigs_r,*eigs_c;
8801:       PetscInt  neigs;
8802:       const char *prefix;

8804:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8805:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8806:       PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8807:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8808:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8809:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8810:       /* prevent from setup unneeded object */
8811:       KSPGetPC(check_ksp,&check_pc);
8812:       PCSetType(check_pc,PCNONE);
8813:       if (ispreonly) {
8814:         check_ksp_type = KSPPREONLY;
8815:         compute_eigs = PETSC_FALSE;
8816:       } else {
8817:         check_ksp_type = KSPGMRES;
8818:         compute_eigs = PETSC_TRUE;
8819:       }
8820:       KSPSetType(check_ksp,check_ksp_type);
8821:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8822:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8823:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8824:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8825:       KSPSetOptionsPrefix(check_ksp,prefix);
8826:       KSPAppendOptionsPrefix(check_ksp,"check_");
8827:       KSPSetFromOptions(check_ksp);
8828:       KSPSetUp(check_ksp);
8829:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8830:       KSPSetPC(check_ksp,check_pc);
8831:       /* create random vec */
8832:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8833:       VecSetRandom(check_vec,NULL);
8834:       MatMult(coarse_mat,check_vec,coarse_vec);
8835:       /* solve coarse problem */
8836:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8837:       KSPCheckSolve(check_ksp,pc,coarse_vec);
8838:       /* set eigenvalue estimation if preonly has not been requested */
8839:       if (compute_eigs) {
8840:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8841:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8842:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8843:         if (neigs) {
8844:           lambda_max = eigs_r[neigs-1];
8845:           lambda_min = eigs_r[0];
8846:           if (pcbddc->use_coarse_estimates) {
8847:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8848:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8849:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8850:             }
8851:           }
8852:         }
8853:       }

8855:       /* check coarse problem residual error */
8856:       if (pcbddc->dbg_flag) {
8857:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8858:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8859:         VecAXPY(check_vec,-1.0,coarse_vec);
8860:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8861:         MatMult(coarse_mat,check_vec,coarse_vec);
8862:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8863:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8864:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8865:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8866:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8867:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8868:         if (CoarseNullSpace) {
8869:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8870:         }
8871:         if (compute_eigs) {
8872:           PetscReal          lambda_max_s,lambda_min_s;
8873:           KSPConvergedReason reason;
8874:           KSPGetType(check_ksp,&check_ksp_type);
8875:           KSPGetIterationNumber(check_ksp,&its);
8876:           KSPGetConvergedReason(check_ksp,&reason);
8877:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8878:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8879:           for (i=0;i<neigs;i++) {
8880:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8881:           }
8882:         }
8883:         PetscViewerFlush(dbg_viewer);
8884:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8885:       }
8886:       VecDestroy(&check_vec);
8887:       VecDestroy(&coarse_vec);
8888:       KSPDestroy(&check_ksp);
8889:       if (compute_eigs) {
8890:         PetscFree(eigs_r);
8891:         PetscFree(eigs_c);
8892:       }
8893:     }
8894:   }
8895:   MatNullSpaceDestroy(&CoarseNullSpace);
8896:   /* print additional info */
8897:   if (pcbddc->dbg_flag) {
8898:     /* waits until all processes reaches this point */
8899:     PetscBarrier((PetscObject)pc);
8900:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8901:     PetscViewerFlush(pcbddc->dbg_viewer);
8902:   }

8904:   /* free memory */
8905:   MatDestroy(&coarse_mat);
8906:   PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8907:   return(0);
8908: }

8910: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8911: {
8912:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8913:   PC_IS*         pcis = (PC_IS*)pc->data;
8914:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8915:   IS             subset,subset_mult,subset_n;
8916:   PetscInt       local_size,coarse_size=0;
8917:   PetscInt       *local_primal_indices=NULL;
8918:   const PetscInt *t_local_primal_indices;

8922:   /* Compute global number of coarse dofs */
8923:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8924:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8925:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8926:   ISDestroy(&subset_n);
8927:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8928:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8929:   ISDestroy(&subset);
8930:   ISDestroy(&subset_mult);
8931:   ISGetLocalSize(subset_n,&local_size);
8932:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8933:   PetscMalloc1(local_size,&local_primal_indices);
8934:   ISGetIndices(subset_n,&t_local_primal_indices);
8935:   PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);
8936:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8937:   ISDestroy(&subset_n);

8939:   /* check numbering */
8940:   if (pcbddc->dbg_flag) {
8941:     PetscScalar coarsesum,*array,*array2;
8942:     PetscInt    i;
8943:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8945:     PetscViewerFlush(pcbddc->dbg_viewer);
8946:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8947:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8948:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8949:     /* counter */
8950:     VecSet(pcis->vec1_global,0.0);
8951:     VecSet(pcis->vec1_N,1.0);
8952:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8953:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8954:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8955:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8956:     VecSet(pcis->vec1_N,0.0);
8957:     for (i=0;i<pcbddc->local_primal_size;i++) {
8958:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8959:     }
8960:     VecAssemblyBegin(pcis->vec1_N);
8961:     VecAssemblyEnd(pcis->vec1_N);
8962:     VecSet(pcis->vec1_global,0.0);
8963:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8964:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8965:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8966:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8967:     VecGetArray(pcis->vec1_N,&array);
8968:     VecGetArray(pcis->vec2_N,&array2);
8969:     for (i=0;i<pcis->n;i++) {
8970:       if (array[i] != 0.0 && array[i] != array2[i]) {
8971:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8972:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8973:         set_error = PETSC_TRUE;
8974:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8975:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8976:       }
8977:     }
8978:     VecRestoreArray(pcis->vec2_N,&array2);
8979:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8980:     PetscViewerFlush(pcbddc->dbg_viewer);
8981:     for (i=0;i<pcis->n;i++) {
8982:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8983:     }
8984:     VecRestoreArray(pcis->vec1_N,&array);
8985:     VecSet(pcis->vec1_global,0.0);
8986:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8987:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8988:     VecSum(pcis->vec1_global,&coarsesum);
8989:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8990:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8991:       PetscInt *gidxs;

8993:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8994:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8995:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8996:       PetscViewerFlush(pcbddc->dbg_viewer);
8997:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8998:       for (i=0;i<pcbddc->local_primal_size;i++) {
8999:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
9000:       }
9001:       PetscViewerFlush(pcbddc->dbg_viewer);
9002:       PetscFree(gidxs);
9003:     }
9004:     PetscViewerFlush(pcbddc->dbg_viewer);
9005:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9006:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
9007:   }

9009:   /* get back data */
9010:   *coarse_size_n = coarse_size;
9011:   *local_primal_indices_n = local_primal_indices;
9012:   return(0);
9013: }

9015: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
9016: {
9017:   IS             localis_t;
9018:   PetscInt       i,lsize,*idxs,n;
9019:   PetscScalar    *vals;

9023:   /* get indices in local ordering exploiting local to global map */
9024:   ISGetLocalSize(globalis,&lsize);
9025:   PetscMalloc1(lsize,&vals);
9026:   for (i=0;i<lsize;i++) vals[i] = 1.0;
9027:   ISGetIndices(globalis,(const PetscInt**)&idxs);
9028:   VecSet(gwork,0.0);
9029:   VecSet(lwork,0.0);
9030:   if (idxs) { /* multilevel guard */
9031:     VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
9032:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
9033:   }
9034:   VecAssemblyBegin(gwork);
9035:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
9036:   PetscFree(vals);
9037:   VecAssemblyEnd(gwork);
9038:   /* now compute set in local ordering */
9039:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9040:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9041:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
9042:   VecGetSize(lwork,&n);
9043:   for (i=0,lsize=0;i<n;i++) {
9044:     if (PetscRealPart(vals[i]) > 0.5) {
9045:       lsize++;
9046:     }
9047:   }
9048:   PetscMalloc1(lsize,&idxs);
9049:   for (i=0,lsize=0;i<n;i++) {
9050:     if (PetscRealPart(vals[i]) > 0.5) {
9051:       idxs[lsize++] = i;
9052:     }
9053:   }
9054:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
9055:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
9056:   *localis = localis_t;
9057:   return(0);
9058: }

9060: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9061: {
9062:   PC_IS               *pcis=(PC_IS*)pc->data;
9063:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9064:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
9065:   Mat                 S_j;
9066:   PetscInt            *used_xadj,*used_adjncy;
9067:   PetscBool           free_used_adj;
9068:   PetscErrorCode      ierr;

9071:   PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9072:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9073:   free_used_adj = PETSC_FALSE;
9074:   if (pcbddc->sub_schurs_layers == -1) {
9075:     used_xadj = NULL;
9076:     used_adjncy = NULL;
9077:   } else {
9078:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9079:       used_xadj = pcbddc->mat_graph->xadj;
9080:       used_adjncy = pcbddc->mat_graph->adjncy;
9081:     } else if (pcbddc->computed_rowadj) {
9082:       used_xadj = pcbddc->mat_graph->xadj;
9083:       used_adjncy = pcbddc->mat_graph->adjncy;
9084:     } else {
9085:       PetscBool      flg_row=PETSC_FALSE;
9086:       const PetscInt *xadj,*adjncy;
9087:       PetscInt       nvtxs;

9089:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9090:       if (flg_row) {
9091:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
9092:         PetscArraycpy(used_xadj,xadj,nvtxs+1);
9093:         PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);
9094:         free_used_adj = PETSC_TRUE;
9095:       } else {
9096:         pcbddc->sub_schurs_layers = -1;
9097:         used_xadj = NULL;
9098:         used_adjncy = NULL;
9099:       }
9100:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9101:     }
9102:   }

9104:   /* setup sub_schurs data */
9105:   MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9106:   if (!sub_schurs->schur_explicit) {
9107:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9108:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9109:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
9110:   } else {
9111:     Mat       change = NULL;
9112:     Vec       scaling = NULL;
9113:     IS        change_primal = NULL, iP;
9114:     PetscInt  benign_n;
9115:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9116:     PetscBool need_change = PETSC_FALSE;
9117:     PetscBool discrete_harmonic = PETSC_FALSE;

9119:     if (!pcbddc->use_vertices && reuse_solvers) {
9120:       PetscInt n_vertices;

9122:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
9123:       reuse_solvers = (PetscBool)!n_vertices;
9124:     }
9125:     if (!pcbddc->benign_change_explicit) {
9126:       benign_n = pcbddc->benign_n;
9127:     } else {
9128:       benign_n = 0;
9129:     }
9130:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9131:        We need a global reduction to avoid possible deadlocks.
9132:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9133:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9134:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9135:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9136:       need_change = (PetscBool)(!need_change);
9137:     }
9138:     /* If the user defines additional constraints, we import them here.
9139:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9140:     if (need_change) {
9141:       PC_IS   *pcisf;
9142:       PC_BDDC *pcbddcf;
9143:       PC      pcf;

9145:       if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9146:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9147:       PCSetOperators(pcf,pc->mat,pc->pmat);
9148:       PCSetType(pcf,PCBDDC);

9150:       /* hacks */
9151:       pcisf                        = (PC_IS*)pcf->data;
9152:       pcisf->is_B_local            = pcis->is_B_local;
9153:       pcisf->vec1_N                = pcis->vec1_N;
9154:       pcisf->BtoNmap               = pcis->BtoNmap;
9155:       pcisf->n                     = pcis->n;
9156:       pcisf->n_B                   = pcis->n_B;
9157:       pcbddcf                      = (PC_BDDC*)pcf->data;
9158:       PetscFree(pcbddcf->mat_graph);
9159:       pcbddcf->mat_graph           = pcbddc->mat_graph;
9160:       pcbddcf->use_faces           = PETSC_TRUE;
9161:       pcbddcf->use_change_of_basis = PETSC_TRUE;
9162:       pcbddcf->use_change_on_faces = PETSC_TRUE;
9163:       pcbddcf->use_qr_single       = PETSC_TRUE;
9164:       pcbddcf->fake_change         = PETSC_TRUE;

9166:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9167:       PCBDDCConstraintsSetUp(pcf);
9168:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9169:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9170:       change = pcbddcf->ConstraintMatrix;
9171:       pcbddcf->ConstraintMatrix = NULL;

9173:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9174:       PetscFree(pcbddcf->sub_schurs);
9175:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9176:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9177:       PetscFree(pcbddcf->primal_indices_local_idxs);
9178:       PetscFree(pcbddcf->onearnullvecs_state);
9179:       PetscFree(pcf->data);
9180:       pcf->ops->destroy = NULL;
9181:       pcf->ops->reset   = NULL;
9182:       PCDestroy(&pcf);
9183:     }
9184:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

9186:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9187:     if (iP) {
9188:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9189:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9190:       PetscOptionsEnd();
9191:     }
9192:     if (discrete_harmonic) {
9193:       Mat A;
9194:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9195:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9196:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9197:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9198:       MatDestroy(&A);
9199:     } else {
9200:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9201:     }
9202:     MatDestroy(&change);
9203:     ISDestroy(&change_primal);
9204:   }
9205:   MatDestroy(&S_j);

9207:   /* free adjacency */
9208:   if (free_used_adj) {
9209:     PetscFree2(used_xadj,used_adjncy);
9210:   }
9211:   PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9212:   return(0);
9213: }

9215: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9216: {
9217:   PC_IS               *pcis=(PC_IS*)pc->data;
9218:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9219:   PCBDDCGraph         graph;
9220:   PetscErrorCode      ierr;

9223:   /* attach interface graph for determining subsets */
9224:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9225:     IS       verticesIS,verticescomm;
9226:     PetscInt vsize,*idxs;

9228:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9229:     ISGetSize(verticesIS,&vsize);
9230:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9231:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9232:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9233:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9234:     PCBDDCGraphCreate(&graph);
9235:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9236:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9237:     ISDestroy(&verticescomm);
9238:     PCBDDCGraphComputeConnectedComponents(graph);
9239:   } else {
9240:     graph = pcbddc->mat_graph;
9241:   }
9242:   /* print some info */
9243:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9244:     IS       vertices;
9245:     PetscInt nv,nedges,nfaces;
9246:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9247:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9248:     ISGetSize(vertices,&nv);
9249:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9250:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9251:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9252:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9253:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9254:     PetscViewerFlush(pcbddc->dbg_viewer);
9255:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9256:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9257:   }

9259:   /* sub_schurs init */
9260:   if (!pcbddc->sub_schurs) {
9261:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9262:   }
9263:   PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);

9265:   /* free graph struct */
9266:   if (pcbddc->sub_schurs_rebuild) {
9267:     PCBDDCGraphDestroy(&graph);
9268:   }
9269:   return(0);
9270: }

9272: PetscErrorCode PCBDDCCheckOperator(PC pc)
9273: {
9274:   PC_IS               *pcis=(PC_IS*)pc->data;
9275:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9276:   PetscErrorCode      ierr;

9279:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9280:     IS             zerodiag = NULL;
9281:     Mat            S_j,B0_B=NULL;
9282:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
9283:     PetscScalar    *p0_check,*array,*array2;
9284:     PetscReal      norm;
9285:     PetscInt       i;

9287:     /* B0 and B0_B */
9288:     if (zerodiag) {
9289:       IS       dummy;

9291:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9292:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9293:       MatCreateVecs(B0_B,NULL,&dummy_vec);
9294:       ISDestroy(&dummy);
9295:     }
9296:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9297:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9298:     VecSet(pcbddc->vec1_P,1.0);
9299:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9300:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9301:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9302:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9303:     VecReciprocal(vec_scale_P);
9304:     /* S_j */
9305:     MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9306:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

9308:     /* mimic vector in \widetilde{W}_\Gamma */
9309:     VecSetRandom(pcis->vec1_N,NULL);
9310:     /* continuous in primal space */
9311:     VecSetRandom(pcbddc->coarse_vec,NULL);
9312:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9313:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9314:     VecGetArray(pcbddc->vec1_P,&array);
9315:     PetscCalloc1(pcbddc->benign_n,&p0_check);
9316:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9317:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9318:     VecRestoreArray(pcbddc->vec1_P,&array);
9319:     VecAssemblyBegin(pcis->vec1_N);
9320:     VecAssemblyEnd(pcis->vec1_N);
9321:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9322:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9323:     VecDuplicate(pcis->vec2_B,&vec_check_B);
9324:     VecCopy(pcis->vec2_B,vec_check_B);

9326:     /* assemble rhs for coarse problem */
9327:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9328:     /* local with Schur */
9329:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9330:     if (zerodiag) {
9331:       VecGetArray(dummy_vec,&array);
9332:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9333:       VecRestoreArray(dummy_vec,&array);
9334:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9335:     }
9336:     /* sum on primal nodes the local contributions */
9337:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9338:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9339:     VecGetArray(pcis->vec1_N,&array);
9340:     VecGetArray(pcbddc->vec1_P,&array2);
9341:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9342:     VecRestoreArray(pcbddc->vec1_P,&array2);
9343:     VecRestoreArray(pcis->vec1_N,&array);
9344:     VecSet(pcbddc->coarse_vec,0.);
9345:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9346:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9347:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9348:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9349:     VecGetArray(pcbddc->vec1_P,&array);
9350:     /* scale primal nodes (BDDC sums contibutions) */
9351:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9352:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9353:     VecRestoreArray(pcbddc->vec1_P,&array);
9354:     VecAssemblyBegin(pcis->vec1_N);
9355:     VecAssemblyEnd(pcis->vec1_N);
9356:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9357:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9358:     /* global: \widetilde{B0}_B w_\Gamma */
9359:     if (zerodiag) {
9360:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
9361:       VecGetArray(dummy_vec,&array);
9362:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9363:       VecRestoreArray(dummy_vec,&array);
9364:     }
9365:     /* BDDC */
9366:     VecSet(pcis->vec1_D,0.);
9367:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

9369:     VecCopy(pcis->vec1_B,pcis->vec2_B);
9370:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9371:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9372:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9373:     for (i=0;i<pcbddc->benign_n;i++) {
9374:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9375:     }
9376:     PetscFree(p0_check);
9377:     VecDestroy(&vec_scale_P);
9378:     VecDestroy(&vec_check_B);
9379:     VecDestroy(&dummy_vec);
9380:     MatDestroy(&S_j);
9381:     MatDestroy(&B0_B);
9382:   }
9383:   return(0);
9384: }

9386: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9387: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9388: {
9389:   Mat            At;
9390:   IS             rows;
9391:   PetscInt       rst,ren;
9393:   PetscLayout    rmap;

9396:   rst = ren = 0;
9397:   if (ccomm != MPI_COMM_NULL) {
9398:     PetscLayoutCreate(ccomm,&rmap);
9399:     PetscLayoutSetSize(rmap,A->rmap->N);
9400:     PetscLayoutSetBlockSize(rmap,1);
9401:     PetscLayoutSetUp(rmap);
9402:     PetscLayoutGetRange(rmap,&rst,&ren);
9403:   }
9404:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9405:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9406:   ISDestroy(&rows);

9408:   if (ccomm != MPI_COMM_NULL) {
9409:     Mat_MPIAIJ *a,*b;
9410:     IS         from,to;
9411:     Vec        gvec;
9412:     PetscInt   lsize;

9414:     MatCreate(ccomm,B);
9415:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9416:     MatSetType(*B,MATAIJ);
9417:     PetscLayoutDestroy(&((*B)->rmap));
9418:     PetscLayoutSetUp((*B)->cmap);
9419:     a    = (Mat_MPIAIJ*)At->data;
9420:     b    = (Mat_MPIAIJ*)(*B)->data;
9421:     MPI_Comm_size(ccomm,&b->size);
9422:     MPI_Comm_rank(ccomm,&b->rank);
9423:     PetscObjectReference((PetscObject)a->A);
9424:     PetscObjectReference((PetscObject)a->B);
9425:     b->A = a->A;
9426:     b->B = a->B;

9428:     b->donotstash      = a->donotstash;
9429:     b->roworiented     = a->roworiented;
9430:     b->rowindices      = NULL;
9431:     b->rowvalues       = NULL;
9432:     b->getrowactive    = PETSC_FALSE;

9434:     (*B)->rmap         = rmap;
9435:     (*B)->factortype   = A->factortype;
9436:     (*B)->assembled    = PETSC_TRUE;
9437:     (*B)->insertmode   = NOT_SET_VALUES;
9438:     (*B)->preallocated = PETSC_TRUE;

9440:     if (a->colmap) {
9441: #if defined(PETSC_USE_CTABLE)
9442:       PetscTableCreateCopy(a->colmap,&b->colmap);
9443: #else
9444:       PetscMalloc1(At->cmap->N,&b->colmap);
9445:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9446:       PetscArraycpy(b->colmap,a->colmap,At->cmap->N);
9447: #endif
9448:     } else b->colmap = NULL;
9449:     if (a->garray) {
9450:       PetscInt len;
9451:       len  = a->B->cmap->n;
9452:       PetscMalloc1(len+1,&b->garray);
9453:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9454:       if (len) { PetscArraycpy(b->garray,a->garray,len); }
9455:     } else b->garray = NULL;

9457:     PetscObjectReference((PetscObject)a->lvec);
9458:     b->lvec = a->lvec;
9459:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

9461:     /* cannot use VecScatterCopy */
9462:     VecGetLocalSize(b->lvec,&lsize);
9463:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9464:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9465:     MatCreateVecs(*B,&gvec,NULL);
9466:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9467:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9468:     ISDestroy(&from);
9469:     ISDestroy(&to);
9470:     VecDestroy(&gvec);
9471:   }
9472:   MatDestroy(&At);
9473:   return(0);
9474: }