Actual source code: bddcschurs.c
petsc-3.15.0 2021-03-30
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <../src/mat/impls/dense/seq/dense.h>
4: #include <petscblaslapack.h>
6: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
7: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
8: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
9: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);
11: /* if v2 is not present, correction is done in-place */
12: PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13: {
14: PetscScalar *array;
15: PetscScalar *array2;
19: if (!ctx->benign_n) return(0);
20: if (sol && full) {
21: PetscInt n_I,size_schur;
23: /* get sizes */
24: MatGetSize(ctx->benign_csAIB,&size_schur,NULL);
25: VecGetSize(v,&n_I);
26: n_I = n_I - size_schur;
27: /* get schur sol from array */
28: VecGetArray(v,&array);
29: VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I);
30: VecRestoreArray(v,&array);
31: /* apply interior sol correction */
32: MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work);
33: VecResetArray(ctx->benign_dummy_schur_vec);
34: MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v);
35: }
36: if (v2) {
37: PetscInt nl;
39: VecGetArrayRead(v,(const PetscScalar**)&array);
40: VecGetLocalSize(v2,&nl);
41: VecGetArray(v2,&array2);
42: PetscArraycpy(array2,array,nl);
43: } else {
44: VecGetArray(v,&array);
45: array2 = array;
46: }
47: if (!sol) { /* change rhs */
48: PetscInt n;
49: for (n=0;n<ctx->benign_n;n++) {
50: PetscScalar sum = 0.;
51: const PetscInt *cols;
52: PetscInt nz,i;
54: ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz);
55: ISGetIndices(ctx->benign_zerodiag_subs[n],&cols);
56: for (i=0;i<nz-1;i++) sum += array[cols[i]];
57: #if defined(PETSC_USE_COMPLEX)
58: sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
59: #else
60: sum = -sum/nz;
61: #endif
62: for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
63: ctx->benign_save_vals[n] = array2[cols[nz-1]];
64: array2[cols[nz-1]] = sum;
65: ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols);
66: }
67: } else {
68: PetscInt n;
69: for (n=0;n<ctx->benign_n;n++) {
70: PetscScalar sum = 0.;
71: const PetscInt *cols;
72: PetscInt nz,i;
73: ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz);
74: ISGetIndices(ctx->benign_zerodiag_subs[n],&cols);
75: for (i=0;i<nz-1;i++) sum += array[cols[i]];
76: #if defined(PETSC_USE_COMPLEX)
77: sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
78: #else
79: sum = -sum/nz;
80: #endif
81: for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
82: array2[cols[nz-1]] = ctx->benign_save_vals[n];
83: ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols);
84: }
85: }
86: if (v2) {
87: VecRestoreArrayRead(v,(const PetscScalar**)&array);
88: VecRestoreArray(v2,&array2);
89: } else {
90: VecRestoreArray(v,&array);
91: }
92: if (!sol && full) {
93: Vec usedv;
94: PetscInt n_I,size_schur;
96: /* get sizes */
97: MatGetSize(ctx->benign_csAIB,&size_schur,NULL);
98: VecGetSize(v,&n_I);
99: n_I = n_I - size_schur;
100: /* compute schur rhs correction */
101: if (v2) {
102: usedv = v2;
103: } else {
104: usedv = v;
105: }
106: /* apply schur rhs correction */
107: MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work);
108: VecGetArrayRead(usedv,(const PetscScalar**)&array);
109: VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I);
110: VecRestoreArrayRead(usedv,(const PetscScalar**)&array);
111: MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec);
112: VecResetArray(ctx->benign_dummy_schur_vec);
113: }
114: return(0);
115: }
117: static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
118: {
119: PCBDDCReuseSolvers ctx;
120: PetscBool copy = PETSC_FALSE;
121: PetscErrorCode ierr;
124: PCShellGetContext(pc,(void **)&ctx);
125: if (full) {
126: #if defined(PETSC_HAVE_MUMPS)
127: MatMumpsSetIcntl(ctx->F,26,-1);
128: #endif
129: #if defined(PETSC_HAVE_MKL_PARDISO)
130: MatMkl_PardisoSetCntl(ctx->F,70,0);
131: #endif
132: copy = ctx->has_vertices;
133: } else { /* interior solver */
134: #if defined(PETSC_HAVE_MUMPS)
135: MatMumpsSetIcntl(ctx->F,26,0);
136: #endif
137: #if defined(PETSC_HAVE_MKL_PARDISO)
138: MatMkl_PardisoSetCntl(ctx->F,70,1);
139: #endif
140: copy = PETSC_TRUE;
141: }
142: /* copy rhs into factored matrix workspace */
143: if (copy) {
144: PetscInt n;
145: PetscScalar *array,*array_solver;
147: VecGetLocalSize(rhs,&n);
148: VecGetArrayRead(rhs,(const PetscScalar**)&array);
149: VecGetArray(ctx->rhs,&array_solver);
150: PetscArraycpy(array_solver,array,n);
151: VecRestoreArray(ctx->rhs,&array_solver);
152: VecRestoreArrayRead(rhs,(const PetscScalar**)&array);
154: PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full);
155: if (transpose) {
156: MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
157: } else {
158: MatSolve(ctx->F,ctx->rhs,ctx->sol);
159: }
160: PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full);
162: /* get back data to caller worskpace */
163: VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver);
164: VecGetArray(sol,&array);
165: PetscArraycpy(array,array_solver,n);
166: VecRestoreArray(sol,&array);
167: VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver);
168: } else {
169: if (ctx->benign_n) {
170: PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full);
171: if (transpose) {
172: MatSolveTranspose(ctx->F,ctx->rhs,sol);
173: } else {
174: MatSolve(ctx->F,ctx->rhs,sol);
175: }
176: PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full);
177: } else {
178: if (transpose) {
179: MatSolveTranspose(ctx->F,rhs,sol);
180: } else {
181: MatSolve(ctx->F,rhs,sol);
182: }
183: }
184: }
185: /* restore defaults */
186: #if defined(PETSC_HAVE_MUMPS)
187: MatMumpsSetIcntl(ctx->F,26,-1);
188: #endif
189: #if defined(PETSC_HAVE_MKL_PARDISO)
190: MatMkl_PardisoSetCntl(ctx->F,70,0);
191: #endif
192: return(0);
193: }
195: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
196: {
197: PetscErrorCode ierr;
200: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE);
201: return(0);
202: }
204: static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
205: {
206: PetscErrorCode ierr;
209: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE);
210: return(0);
211: }
213: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
214: {
215: PetscErrorCode ierr;
218: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE);
219: return(0);
220: }
222: static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
223: {
224: PetscErrorCode ierr;
227: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE);
228: return(0);
229: }
231: static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
232: {
233: PCBDDCReuseSolvers ctx;
234: PetscBool iascii;
235: PetscErrorCode ierr;
238: PCShellGetContext(pc,(void **)&ctx);
239: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
240: if (iascii) {
241: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO);
242: }
243: MatView(ctx->F,viewer);
244: if (iascii) {
245: PetscViewerPopFormat(viewer);
246: }
247: return(0);
248: }
250: static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
251: {
252: PetscInt i;
256: MatDestroy(&reuse->F);
257: VecDestroy(&reuse->sol);
258: VecDestroy(&reuse->rhs);
259: PCDestroy(&reuse->interior_solver);
260: PCDestroy(&reuse->correction_solver);
261: ISDestroy(&reuse->is_R);
262: ISDestroy(&reuse->is_B);
263: VecScatterDestroy(&reuse->correction_scatter_B);
264: VecDestroy(&reuse->sol_B);
265: VecDestroy(&reuse->rhs_B);
266: for (i=0;i<reuse->benign_n;i++) {
267: ISDestroy(&reuse->benign_zerodiag_subs[i]);
268: }
269: PetscFree(reuse->benign_zerodiag_subs);
270: PetscFree(reuse->benign_save_vals);
271: MatDestroy(&reuse->benign_csAIB);
272: MatDestroy(&reuse->benign_AIIm1ones);
273: VecDestroy(&reuse->benign_corr_work);
274: VecDestroy(&reuse->benign_dummy_schur_vec);
275: return(0);
276: }
278: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
279: {
280: Mat B, C, D, Bd, Cd, AinvBd;
281: KSP ksp;
282: PC pc;
283: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
284: PetscReal fill = 2.0;
285: PetscInt n_I;
286: PetscMPIInt size;
290: MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
291: if (size != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
292: if (reuse == MAT_REUSE_MATRIX) {
293: PetscBool Sdense;
295: PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
296: if (!Sdense) SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
297: }
298: MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
299: MatSchurComplementGetKSP(M, &ksp);
300: KSPGetPC(ksp, &pc);
301: PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
302: PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
303: PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
304: PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
305: PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
306: MatGetSize(B,&n_I,NULL);
307: if (n_I) {
308: if (!Bdense) {
309: MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
310: } else {
311: Bd = B;
312: }
314: if (isLU || isILU || isCHOL) {
315: Mat fact;
316: KSPSetUp(ksp);
317: PCFactorGetMatrix(pc, &fact);
318: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
319: MatMatSolve(fact, Bd, AinvBd);
320: } else {
321: PetscBool ex = PETSC_TRUE;
323: if (ex) {
324: Mat Ainvd;
326: PCComputeOperator(pc, MATDENSE, &Ainvd);
327: MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
328: MatDestroy(&Ainvd);
329: } else {
330: Vec sol,rhs;
331: PetscScalar *arrayrhs,*arraysol;
332: PetscInt i,nrhs,n;
334: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
335: MatGetSize(Bd,&n,&nrhs);
336: MatDenseGetArray(Bd,&arrayrhs);
337: MatDenseGetArray(AinvBd,&arraysol);
338: KSPGetSolution(ksp,&sol);
339: KSPGetRhs(ksp,&rhs);
340: for (i=0;i<nrhs;i++) {
341: VecPlaceArray(rhs,arrayrhs+i*n);
342: VecPlaceArray(sol,arraysol+i*n);
343: KSPSolve(ksp,rhs,sol);
344: VecResetArray(rhs);
345: VecResetArray(sol);
346: }
347: MatDenseRestoreArray(Bd,&arrayrhs);
348: MatDenseRestoreArray(AinvBd,&arrayrhs);
349: }
350: }
351: if (!Bdense & !issym) {
352: MatDestroy(&Bd);
353: }
355: if (!issym) {
356: if (!Cdense) {
357: MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
358: } else {
359: Cd = C;
360: }
361: MatMatMult(Cd, AinvBd, reuse, fill, S);
362: if (!Cdense) {
363: MatDestroy(&Cd);
364: }
365: } else {
366: MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
367: if (!Bdense) {
368: MatDestroy(&Bd);
369: }
370: }
371: MatDestroy(&AinvBd);
372: }
374: if (D) {
375: Mat Dd;
376: PetscBool Ddense;
378: PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
379: if (!Ddense) {
380: MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
381: } else {
382: Dd = D;
383: }
384: if (n_I) {
385: MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
386: } else {
387: if (reuse == MAT_INITIAL_MATRIX) {
388: MatDuplicate(Dd,MAT_COPY_VALUES,S);
389: } else {
390: MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
391: }
392: }
393: if (!Ddense) {
394: MatDestroy(&Dd);
395: }
396: } else {
397: MatScale(*S,-1.0);
398: }
399: return(0);
400: }
402: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
403: {
404: Mat F,A_II,A_IB,A_BI,A_BB,AE_II;
405: Mat S_all;
406: Vec gstash,lstash;
407: VecScatter sstash;
408: IS is_I,is_I_layer;
409: IS all_subsets,all_subsets_mult,all_subsets_n;
410: PetscScalar *stasharray,*Bwork;
411: PetscInt *nnz,*all_local_idx_N;
412: PetscInt *auxnum1,*auxnum2;
413: PetscInt i,subset_size,max_subset_size;
414: PetscInt n_B,extra,local_size,global_size;
415: PetscInt local_stash_size;
416: PetscBLASInt B_N,B_ierr,B_lwork,*pivots;
417: MPI_Comm comm_n;
418: PetscBool deluxe = PETSC_TRUE;
419: PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
420: PetscViewer matl_dbg_viewer = NULL;
421: PetscErrorCode ierr;
422: PetscBool flg;
425: MatDestroy(&sub_schurs->A);
426: MatDestroy(&sub_schurs->S);
427: if (Ain) {
428: PetscObjectReference((PetscObject)Ain);
429: sub_schurs->A = Ain;
430: }
432: PetscObjectReference((PetscObject)Sin);
433: sub_schurs->S = Sin;
434: if (sub_schurs->schur_explicit) {
435: sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
436: }
438: /* preliminary checks */
439: if (!sub_schurs->schur_explicit && compute_Stilda) SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
441: if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
443: /* debug (MATLAB) */
444: if (sub_schurs->debug) {
445: PetscMPIInt size,rank;
446: PetscInt nr,*print_schurs_ranks,print_schurs = PETSC_FALSE;
448: MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size);
449: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
450: nr = size;
451: PetscMalloc1(nr,&print_schurs_ranks);
452: PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
453: PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg);
454: if (!flg) print_schurs = PETSC_TRUE;
455: else {
456: print_schurs = PETSC_FALSE;
457: for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
458: }
459: PetscOptionsEnd();
460: PetscFree(print_schurs_ranks);
461: if (print_schurs) {
462: char filename[256];
464: PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank);
465: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer);
466: PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB);
467: }
468: }
471: /* restrict work on active processes */
472: if (sub_schurs->restrict_comm) {
473: PetscSubcomm subcomm;
474: PetscMPIInt color,rank;
476: color = 0;
477: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
478: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
479: PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
480: PetscSubcommSetNumber(subcomm,2);
481: PetscSubcommSetTypeGeneral(subcomm,color,rank);
482: PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
483: PetscSubcommDestroy(&subcomm);
484: if (!sub_schurs->n_subs) {
485: PetscCommDestroy(&comm_n);
486: return(0);
487: }
488: } else {
489: PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL);
490: }
492: /* get Schur complement matrices */
493: if (!sub_schurs->schur_explicit) {
494: Mat tA_IB,tA_BI,tA_BB;
495: PetscBool isseqsbaij;
496: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
497: PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
498: if (isseqsbaij) {
499: MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
500: MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
501: MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
502: } else {
503: PetscObjectReference((PetscObject)tA_BB);
504: A_BB = tA_BB;
505: PetscObjectReference((PetscObject)tA_IB);
506: A_IB = tA_IB;
507: PetscObjectReference((PetscObject)tA_BI);
508: A_BI = tA_BI;
509: }
510: } else {
511: A_II = NULL;
512: A_IB = NULL;
513: A_BI = NULL;
514: A_BB = NULL;
515: }
516: S_all = NULL;
518: /* determine interior problems */
519: ISGetLocalSize(sub_schurs->is_I,&i);
520: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
521: PetscBT touched;
522: const PetscInt* idx_B;
523: PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
525: if (!xadj) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
526: /* get sizes */
527: ISGetLocalSize(sub_schurs->is_I,&n_I);
528: ISGetLocalSize(sub_schurs->is_B,&n_B);
530: PetscMalloc1(n_I+n_B,&local_numbering);
531: PetscBTCreate(n_I+n_B,&touched);
532: PetscBTMemzero(n_I+n_B,touched);
534: /* all boundary dofs must be skipped when adding layers */
535: ISGetIndices(sub_schurs->is_B,&idx_B);
536: for (j=0;j<n_B;j++) {
537: PetscBTSet(touched,idx_B[j]);
538: }
539: PetscArraycpy(local_numbering,idx_B,n_B);
540: ISRestoreIndices(sub_schurs->is_B,&idx_B);
542: /* add prescribed number of layers of dofs */
543: n_local_dofs = n_B;
544: n_prev_added = n_B;
545: for (layer=0;layer<nlayers;layer++) {
546: PetscInt n_added;
547: if (n_local_dofs == n_I+n_B) break;
548: if (n_local_dofs > n_I+n_B) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B);
549: PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
550: n_prev_added = n_added;
551: n_local_dofs += n_added;
552: if (!n_added) break;
553: }
554: PetscBTDestroy(&touched);
556: /* IS for I layer dofs in original numbering */
557: ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
558: PetscFree(local_numbering);
559: ISSort(is_I_layer);
560: /* IS for I layer dofs in I numbering */
561: if (!sub_schurs->schur_explicit) {
562: ISLocalToGlobalMapping ItoNmap;
563: ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
564: ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
565: ISLocalToGlobalMappingDestroy(&ItoNmap);
567: /* II block */
568: MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
569: }
570: } else {
571: PetscInt n_I;
573: /* IS for I dofs in original numbering */
574: PetscObjectReference((PetscObject)sub_schurs->is_I);
575: is_I_layer = sub_schurs->is_I;
577: /* IS for I dofs in I numbering (strided 1) */
578: if (!sub_schurs->schur_explicit) {
579: ISGetSize(sub_schurs->is_I,&n_I);
580: ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);
582: /* II block is the same */
583: PetscObjectReference((PetscObject)A_II);
584: AE_II = A_II;
585: }
586: }
588: /* Get info on subset sizes and sum of all subsets sizes */
589: max_subset_size = 0;
590: local_size = 0;
591: for (i=0;i<sub_schurs->n_subs;i++) {
592: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
593: max_subset_size = PetscMax(subset_size,max_subset_size);
594: local_size += subset_size;
595: }
597: /* Work arrays for local indices */
598: extra = 0;
599: ISGetLocalSize(sub_schurs->is_B,&n_B);
600: if (sub_schurs->schur_explicit && is_I_layer) {
601: ISGetLocalSize(is_I_layer,&extra);
602: }
603: PetscMalloc1(n_B+extra,&all_local_idx_N);
604: if (extra) {
605: const PetscInt *idxs;
606: ISGetIndices(is_I_layer,&idxs);
607: PetscArraycpy(all_local_idx_N,idxs,extra);
608: ISRestoreIndices(is_I_layer,&idxs);
609: }
610: PetscMalloc1(sub_schurs->n_subs,&auxnum1);
611: PetscMalloc1(sub_schurs->n_subs,&auxnum2);
613: /* Get local indices in local numbering */
614: local_size = 0;
615: local_stash_size = 0;
616: for (i=0;i<sub_schurs->n_subs;i++) {
617: const PetscInt *idxs;
619: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
620: ISGetIndices(sub_schurs->is_subs[i],&idxs);
621: /* start (smallest in global ordering) and multiplicity */
622: auxnum1[i] = idxs[0];
623: auxnum2[i] = subset_size*subset_size;
624: /* subset indices in local numbering */
625: PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size);
626: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
627: local_size += subset_size;
628: local_stash_size += subset_size*subset_size;
629: }
631: /* allocate extra workspace needed only for GETRI or SYTRF */
632: use_potr = use_sytr = PETSC_FALSE;
633: if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
634: use_potr = PETSC_TRUE;
635: } else if (sub_schurs->is_symmetric) {
636: use_sytr = PETSC_TRUE;
637: }
638: if (local_size && !use_potr) {
639: PetscScalar lwork,dummyscalar = 0.;
640: PetscBLASInt dummyint = 0;
642: B_lwork = -1;
643: PetscBLASIntCast(local_size,&B_N);
644: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
645: if (use_sytr) {
646: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
647: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr);
648: } else {
649: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
650: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
651: }
652: PetscFPTrapPop();
653: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
654: PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
655: } else {
656: Bwork = NULL;
657: pivots = NULL;
658: }
660: /* prepare data for summing up properly schurs on subsets */
661: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
662: ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
663: ISDestroy(&all_subsets_n);
664: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
665: ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
666: ISDestroy(&all_subsets);
667: ISDestroy(&all_subsets_mult);
668: ISGetLocalSize(all_subsets_n,&i);
669: if (i != local_stash_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_stash_size);
670: VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash);
671: VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash);
672: VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash);
673: ISDestroy(&all_subsets_n);
675: /* subset indices in local boundary numbering */
676: if (!sub_schurs->is_Ej_all) {
677: PetscInt *all_local_idx_B;
679: PetscMalloc1(local_size,&all_local_idx_B);
680: ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
681: if (subset_size != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D",subset_size,local_size);
682: ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
683: }
685: if (change) {
686: ISLocalToGlobalMapping BtoS;
687: IS change_primal_B;
688: IS change_primal_all;
690: if (sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
691: if (sub_schurs->change) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
692: PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub);
693: for (i=0;i<sub_schurs->n_subs;i++) {
694: ISLocalToGlobalMapping NtoS;
695: ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS);
696: ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]);
697: ISLocalToGlobalMappingDestroy(&NtoS);
698: }
699: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B);
700: ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS);
701: ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all);
702: ISLocalToGlobalMappingDestroy(&BtoS);
703: ISDestroy(&change_primal_B);
704: PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change);
705: for (i=0;i<sub_schurs->n_subs;i++) {
706: Mat change_sub;
708: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
709: KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]);
710: KSPSetType(sub_schurs->change[i],KSPPREONLY);
711: if (!sub_schurs->change_with_qr) {
712: MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub);
713: } else {
714: Mat change_subt;
715: MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt);
716: MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub);
717: MatDestroy(&change_subt);
718: }
719: KSPSetOperators(sub_schurs->change[i],change_sub,change_sub);
720: MatDestroy(&change_sub);
721: KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix);
722: KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_");
723: }
724: ISDestroy(&change_primal_all);
725: }
727: /* Local matrix of all local Schur on subsets (transposed) */
728: if (!sub_schurs->S_Ej_all) {
729: Mat T;
730: PetscScalar *v;
731: PetscInt *ii,*jj;
732: PetscInt cum,i,j,k;
734: /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
735: Allocate properly a representative matrix and duplicate */
736: PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v);
737: ii[0] = 0;
738: cum = 0;
739: for (i=0;i<sub_schurs->n_subs;i++) {
740: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
741: for (j=0;j<subset_size;j++) {
742: const PetscInt row = cum+j;
743: PetscInt col = cum;
745: ii[row+1] = ii[row] + subset_size;
746: for (k=ii[row];k<ii[row+1];k++) {
747: jj[k] = col;
748: col++;
749: }
750: }
751: cum += subset_size;
752: }
753: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T);
754: MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all);
755: MatDestroy(&T);
756: PetscFree3(ii,jj,v);
757: }
758: /* matrices for deluxe scaling and adaptive selection */
759: if (compute_Stilda) {
760: if (!sub_schurs->sum_S_Ej_tilda_all) {
761: MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all);
762: }
763: if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
764: MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all);
765: }
766: }
768: /* Compute Schur complements explicitly */
769: F = NULL;
770: if (!sub_schurs->schur_explicit) {
771: /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
772: it is not efficient, unless the economic version of the scaling is used */
773: Mat S_Ej_expl;
774: PetscScalar *work;
775: PetscInt j,*dummy_idx;
776: PetscBool Sdense;
778: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
779: local_size = 0;
780: for (i=0;i<sub_schurs->n_subs;i++) {
781: IS is_subset_B;
782: Mat AE_EE,AE_IE,AE_EI,S_Ej;
784: /* subsets in original and boundary numbering */
785: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
786: /* EE block */
787: MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
788: /* IE block */
789: MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
790: /* EI block */
791: if (sub_schurs->is_symmetric) {
792: MatCreateTranspose(AE_IE,&AE_EI);
793: } else if (sub_schurs->is_hermitian) {
794: MatCreateHermitianTranspose(AE_IE,&AE_EI);
795: } else {
796: MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
797: }
798: ISDestroy(&is_subset_B);
799: MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
800: MatDestroy(&AE_EE);
801: MatDestroy(&AE_IE);
802: MatDestroy(&AE_EI);
803: if (AE_II == A_II) { /* we can reuse the same ksp */
804: KSP ksp;
805: MatSchurComplementGetKSP(sub_schurs->S,&ksp);
806: MatSchurComplementSetKSP(S_Ej,ksp);
807: } else { /* build new ksp object which inherits ksp and pc types from the original one */
808: KSP origksp,schurksp;
809: PC origpc,schurpc;
810: KSPType ksp_type;
811: PetscInt n_internal;
812: PetscBool ispcnone;
814: MatSchurComplementGetKSP(sub_schurs->S,&origksp);
815: MatSchurComplementGetKSP(S_Ej,&schurksp);
816: KSPGetType(origksp,&ksp_type);
817: KSPSetType(schurksp,ksp_type);
818: KSPGetPC(schurksp,&schurpc);
819: KSPGetPC(origksp,&origpc);
820: PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
821: if (!ispcnone) {
822: PCType pc_type;
823: PCGetType(origpc,&pc_type);
824: PCSetType(schurpc,pc_type);
825: } else {
826: PCSetType(schurpc,PCLU);
827: }
828: ISGetSize(is_I,&n_internal);
829: if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
830: MatSolverType solver = NULL;
831: PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver);
832: if (solver) {
833: PCFactorSetMatSolverType(schurpc,solver);
834: }
835: }
836: KSPSetUp(schurksp);
837: }
838: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
839: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
840: PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl);
841: PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
842: if (Sdense) {
843: for (j=0;j<subset_size;j++) {
844: dummy_idx[j]=local_size+j;
845: }
846: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
847: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
848: MatDestroy(&S_Ej);
849: MatDestroy(&S_Ej_expl);
850: local_size += subset_size;
851: }
852: PetscFree2(dummy_idx,work);
853: /* free */
854: ISDestroy(&is_I);
855: MatDestroy(&AE_II);
856: PetscFree(all_local_idx_N);
857: } else {
858: Mat A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
859: Vec Dall = NULL;
860: IS is_A_all,*is_p_r = NULL;
861: MatType Stype;
862: PetscScalar *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
863: PetscScalar *SEj_arr = NULL,*SEjinv_arr = NULL;
864: const PetscScalar *rS_data;
865: PetscInt n,n_I,size_schur,size_active_schur,cum,cum2;
866: PetscBool economic,solver_S,S_lower_triangular = PETSC_FALSE;
867: PetscBool schur_has_vertices,factor_workaround;
868: PetscBool use_cholesky;
869: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
870: PetscBool oldpin;
871: #endif
873: /* get sizes */
874: n_I = 0;
875: if (is_I_layer) {
876: ISGetLocalSize(is_I_layer,&n_I);
877: }
878: economic = PETSC_FALSE;
879: ISGetLocalSize(sub_schurs->is_I,&cum);
880: if (cum != n_I) economic = PETSC_TRUE;
881: MatGetLocalSize(sub_schurs->A,&n,NULL);
882: size_active_schur = local_size;
884: /* import scaling vector (wrong formulation if we have 3D edges) */
885: if (scaling && compute_Stilda) {
886: const PetscScalar *array;
887: PetscScalar *array2;
888: const PetscInt *idxs;
889: PetscInt i;
891: ISGetIndices(sub_schurs->is_Ej_all,&idxs);
892: VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall);
893: VecGetArrayRead(scaling,&array);
894: VecGetArray(Dall,&array2);
895: for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
896: VecRestoreArray(Dall,&array2);
897: VecRestoreArrayRead(scaling,&array);
898: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
899: deluxe = PETSC_FALSE;
900: }
902: /* size active schurs does not count any dirichlet or vertex dof on the interface */
903: factor_workaround = PETSC_FALSE;
904: schur_has_vertices = PETSC_FALSE;
905: cum = n_I+size_active_schur;
906: if (sub_schurs->is_dir) {
907: const PetscInt* idxs;
908: PetscInt n_dir;
910: ISGetLocalSize(sub_schurs->is_dir,&n_dir);
911: ISGetIndices(sub_schurs->is_dir,&idxs);
912: PetscArraycpy(all_local_idx_N+cum,idxs,n_dir);
913: ISRestoreIndices(sub_schurs->is_dir,&idxs);
914: cum += n_dir;
915: factor_workaround = PETSC_TRUE;
916: }
917: /* include the primal vertices in the Schur complement */
918: if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
919: PetscInt n_v;
921: ISGetLocalSize(sub_schurs->is_vertices,&n_v);
922: if (n_v) {
923: const PetscInt* idxs;
925: ISGetIndices(sub_schurs->is_vertices,&idxs);
926: PetscArraycpy(all_local_idx_N+cum,idxs,n_v);
927: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
928: cum += n_v;
929: factor_workaround = PETSC_TRUE;
930: schur_has_vertices = PETSC_TRUE;
931: }
932: }
933: size_schur = cum - n_I;
934: ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all);
935: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
936: oldpin = sub_schurs->A->boundtocpu;
937: MatBindToCPU(sub_schurs->A,PETSC_TRUE);
938: #endif
939: if (cum == n) {
940: ISSetPermutation(is_A_all);
941: MatPermute(sub_schurs->A,is_A_all,is_A_all,&A);
942: } else {
943: MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
944: }
945: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
946: MatBindToCPU(sub_schurs->A,oldpin);
947: #endif
948: MatSetOptionsPrefix(A,sub_schurs->prefix);
949: MatAppendOptionsPrefix(A,"sub_schurs_");
951: /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
952: this is a workaround */
953: if (benign_n) {
954: Vec v,benign_AIIm1_ones;
955: ISLocalToGlobalMapping N_to_reor;
956: IS is_p0,is_p0_p;
957: PetscScalar *cs_AIB,*AIIm1_data;
958: PetscInt sizeA;
960: ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor);
961: ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
962: ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p);
963: ISDestroy(&is_p0);
964: MatCreateVecs(A,&v,&benign_AIIm1_ones);
965: VecGetSize(v,&sizeA);
966: MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat);
967: MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat);
968: MatDenseGetArray(cs_AIB_mat,&cs_AIB);
969: MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data);
970: PetscMalloc1(benign_n,&is_p_r);
971: /* compute colsum of A_IB restricted to pressures */
972: for (i=0;i<benign_n;i++) {
973: const PetscScalar *array;
974: const PetscInt *idxs;
975: PetscInt j,nz;
977: ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]);
978: ISGetLocalSize(is_p_r[i],&nz);
979: ISGetIndices(is_p_r[i],&idxs);
980: for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
981: ISRestoreIndices(is_p_r[i],&idxs);
982: VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i);
983: MatMult(A,benign_AIIm1_ones,v);
984: VecResetArray(benign_AIIm1_ones);
985: VecGetArrayRead(v,&array);
986: for (j=0;j<size_schur;j++) {
987: #if defined(PETSC_USE_COMPLEX)
988: cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
989: #else
990: cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
991: #endif
992: }
993: VecRestoreArrayRead(v,&array);
994: }
995: MatDenseRestoreArray(cs_AIB_mat,&cs_AIB);
996: MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data);
997: VecDestroy(&v);
998: VecDestroy(&benign_AIIm1_ones);
999: MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
1000: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1001: MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1002: MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL);
1003: ISDestroy(&is_p0_p);
1004: ISLocalToGlobalMappingDestroy(&N_to_reor);
1005: }
1006: MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric);
1007: MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
1008: MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);
1010: /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
1011: use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
1013: /* when using the benign subspace trick, the local Schur complements are SPD */
1014: /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
1015: Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
1016: if (benign_trick) {
1017: sub_schurs->is_posdef = PETSC_TRUE;
1018: PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg);
1019: if (flg) use_cholesky = PETSC_FALSE;
1020: }
1022: if (n_I) {
1023: IS is_schur;
1024: char stype[64];
1025: PetscBool gpu = PETSC_FALSE;
1027: if (use_cholesky) {
1028: MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F);
1029: } else {
1030: MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F);
1031: }
1032: MatSetErrorIfFailure(A,PETSC_TRUE);
1033: #if defined(PETSC_HAVE_MKL_PARDISO)
1034: if (benign_trick) { MatMkl_PardisoSetCntl(F,10,10); }
1035: #endif
1036: /* subsets ordered last */
1037: ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur);
1038: MatFactorSetSchurIS(F,is_schur);
1039: ISDestroy(&is_schur);
1041: /* factorization step */
1042: if (use_cholesky) {
1043: MatCholeskyFactorSymbolic(F,A,NULL,NULL);
1044: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1045: MatMumpsSetIcntl(F,19,2);
1046: #endif
1047: MatCholeskyFactorNumeric(F,A,NULL);
1048: S_lower_triangular = PETSC_TRUE;
1049: } else {
1050: MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
1051: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1052: MatMumpsSetIcntl(F,19,3);
1053: #endif
1054: MatLUFactorNumeric(F,A,NULL);
1055: }
1056: MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view");
1058: if (matl_dbg_viewer) {
1059: Mat S;
1060: IS is;
1062: PetscObjectSetName((PetscObject)A,"A");
1063: MatView(A,matl_dbg_viewer);
1064: MatFactorCreateSchurComplement(F,&S,NULL);
1065: PetscObjectSetName((PetscObject)S,"S");
1066: MatView(S,matl_dbg_viewer);
1067: MatDestroy(&S);
1068: ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is);
1069: PetscObjectSetName((PetscObject)is,"I");
1070: ISView(is,matl_dbg_viewer);
1071: ISDestroy(&is);
1072: ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is);
1073: PetscObjectSetName((PetscObject)is,"B");
1074: ISView(is,matl_dbg_viewer);
1075: ISDestroy(&is);
1076: PetscObjectSetName((PetscObject)is_A_all,"IA");
1077: ISView(is_A_all,matl_dbg_viewer);
1078: }
1080: /* get explicit Schur Complement computed during numeric factorization */
1081: MatFactorGetSchurComplement(F,&S_all,NULL);
1082: PetscStrncpy(stype,MATSEQDENSE,sizeof(stype));
1083: #if defined(PETSC_HAVE_CUDA)
1084: PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,"");
1085: #endif
1086: if (gpu) {
1087: PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype));
1088: }
1089: PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL);
1090: MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all);
1091: MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef);
1092: MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian);
1093: MatGetType(S_all,&Stype);
1095: /* we can reuse the solvers if we are not using the economic version */
1096: reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1097: factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1098: if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
1099: reuse_solvers = factor_workaround = PETSC_FALSE;
1101: solver_S = PETSC_TRUE;
1103: /* update the Schur complement with the change of basis on the pressures */
1104: if (benign_n) {
1105: const PetscScalar *cs_AIB;
1106: PetscScalar *S_data,*AIIm1_data;
1107: Mat S2 = NULL,S3 = NULL; /* dbg */
1108: PetscScalar *S2_data,*S3_data; /* dbg */
1109: Vec v,benign_AIIm1_ones;
1110: PetscInt sizeA;
1112: MatDenseGetArray(S_all,&S_data);
1113: MatCreateVecs(A,&v,&benign_AIIm1_ones);
1114: VecGetSize(v,&sizeA);
1115: #if defined(PETSC_HAVE_MUMPS)
1116: MatMumpsSetIcntl(F,26,0);
1117: #endif
1118: #if defined(PETSC_HAVE_MKL_PARDISO)
1119: MatMkl_PardisoSetCntl(F,70,1);
1120: #endif
1121: MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB);
1122: MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data);
1123: if (matl_dbg_viewer) {
1124: MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2);
1125: MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3);
1126: MatDenseGetArray(S2,&S2_data);
1127: MatDenseGetArray(S3,&S3_data);
1128: }
1129: for (i=0;i<benign_n;i++) {
1130: PetscScalar *array,sum = 0.,one = 1.,*sums;
1131: const PetscInt *idxs;
1132: PetscInt k,j,nz;
1133: PetscBLASInt B_k,B_n;
1135: PetscCalloc1(benign_n,&sums);
1136: VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i);
1137: VecCopy(benign_AIIm1_ones,v);
1138: MatSolve(F,v,benign_AIIm1_ones);
1139: MatMult(A,benign_AIIm1_ones,v);
1140: VecResetArray(benign_AIIm1_ones);
1141: /* p0 dofs (eliminated) are excluded from the sums */
1142: for (k=0;k<benign_n;k++) {
1143: ISGetLocalSize(is_p_r[k],&nz);
1144: ISGetIndices(is_p_r[k],&idxs);
1145: for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1146: ISRestoreIndices(is_p_r[k],&idxs);
1147: }
1148: VecGetArrayRead(v,(const PetscScalar**)&array);
1149: if (matl_dbg_viewer) {
1150: Vec vv;
1151: char name[16];
1153: VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv);
1154: PetscSNPrintf(name,sizeof(name),"Pvs%D",i);
1155: PetscObjectSetName((PetscObject)vv,name);
1156: VecView(vv,matl_dbg_viewer);
1157: }
1158: /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1159: /* cs_AIB already scaled by 1./nz */
1160: B_k = 1;
1161: for (k=0;k<benign_n;k++) {
1162: sum = sums[k];
1163: PetscBLASIntCast(size_schur,&B_n);
1165: if (PetscAbsScalar(sum) == 0.0) continue;
1166: if (k == i) {
1167: PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1168: if (matl_dbg_viewer) {
1169: PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1170: }
1171: } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1172: sum /= 2.0;
1173: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1174: if (matl_dbg_viewer) {
1175: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1176: }
1177: }
1178: }
1179: sum = 1.;
1180: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1181: if (matl_dbg_viewer) {
1182: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
1183: }
1184: VecRestoreArrayRead(v,(const PetscScalar**)&array);
1185: /* set p0 entry of AIIm1_ones to zero */
1186: ISGetLocalSize(is_p_r[i],&nz);
1187: ISGetIndices(is_p_r[i],&idxs);
1188: for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1189: ISRestoreIndices(is_p_r[i],&idxs);
1190: PetscFree(sums);
1191: }
1192: VecDestroy(&benign_AIIm1_ones);
1193: if (matl_dbg_viewer) {
1194: MatDenseRestoreArray(S2,&S2_data);
1195: MatDenseRestoreArray(S3,&S3_data);
1196: }
1197: if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1198: PetscInt k,j;
1199: for (k=0;k<size_schur;k++) {
1200: for (j=k;j<size_schur;j++) {
1201: S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1202: }
1203: }
1204: }
1206: /* restore defaults */
1207: #if defined(PETSC_HAVE_MUMPS)
1208: MatMumpsSetIcntl(F,26,-1);
1209: #endif
1210: #if defined(PETSC_HAVE_MKL_PARDISO)
1211: MatMkl_PardisoSetCntl(F,70,0);
1212: #endif
1213: MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB);
1214: MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data);
1215: VecDestroy(&v);
1216: MatDenseRestoreArray(S_all,&S_data);
1217: if (matl_dbg_viewer) {
1218: Mat S;
1220: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1221: MatFactorCreateSchurComplement(F,&S,NULL);
1222: PetscObjectSetName((PetscObject)S,"Sb");
1223: MatView(S,matl_dbg_viewer);
1224: MatDestroy(&S);
1225: PetscObjectSetName((PetscObject)S2,"S2P");
1226: MatView(S2,matl_dbg_viewer);
1227: PetscObjectSetName((PetscObject)S3,"S3P");
1228: MatView(S3,matl_dbg_viewer);
1229: PetscObjectSetName((PetscObject)cs_AIB_mat,"cs");
1230: MatView(cs_AIB_mat,matl_dbg_viewer);
1231: MatFactorGetSchurComplement(F,&S_all,NULL);
1232: }
1233: MatDestroy(&S2);
1234: MatDestroy(&S3);
1235: }
1236: if (!reuse_solvers) {
1237: for (i=0;i<benign_n;i++) {
1238: ISDestroy(&is_p_r[i]);
1239: }
1240: PetscFree(is_p_r);
1241: MatDestroy(&cs_AIB_mat);
1242: MatDestroy(&benign_AIIm1_ones_mat);
1243: }
1244: } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1245: MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
1246: MatGetType(S_all,&Stype);
1247: reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1248: factor_workaround = PETSC_FALSE;
1249: solver_S = PETSC_FALSE;
1250: }
1252: if (reuse_solvers) {
1253: Mat A_II,Afake;
1254: Vec vec1_B;
1255: PCBDDCReuseSolvers msolv_ctx;
1256: PetscInt n_R;
1258: if (sub_schurs->reuse_solver) {
1259: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1260: } else {
1261: PetscNew(&sub_schurs->reuse_solver);
1262: }
1263: msolv_ctx = sub_schurs->reuse_solver;
1264: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
1265: PetscObjectReference((PetscObject)F);
1266: msolv_ctx->F = F;
1267: MatCreateVecs(F,&msolv_ctx->sol,NULL);
1268: /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1269: {
1270: PetscScalar *array;
1271: PetscInt n;
1273: VecGetLocalSize(msolv_ctx->sol,&n);
1274: VecGetArray(msolv_ctx->sol,&array);
1275: VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs);
1276: VecRestoreArray(msolv_ctx->sol,&array);
1277: }
1278: msolv_ctx->has_vertices = schur_has_vertices;
1280: /* interior solver */
1281: PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver);
1282: PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
1283: PCSetType(msolv_ctx->interior_solver,PCSHELL);
1284: PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)");
1285: PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
1286: PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View);
1287: PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior);
1288: PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose);
1290: /* correction solver */
1291: PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver);
1292: PCSetType(msolv_ctx->correction_solver,PCSHELL);
1293: PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)");
1294: PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
1295: PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View);
1296: PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction);
1297: PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose);
1299: /* scatter and vecs for Schur complement solver */
1300: MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
1301: MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
1302: if (!schur_has_vertices) {
1303: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
1304: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
1305: PetscObjectReference((PetscObject)is_A_all);
1306: msolv_ctx->is_R = is_A_all;
1307: } else {
1308: IS is_B_all;
1309: const PetscInt* idxs;
1310: PetscInt dual,n_v,n;
1312: ISGetLocalSize(sub_schurs->is_vertices,&n_v);
1313: dual = size_schur - n_v;
1314: ISGetLocalSize(is_A_all,&n);
1315: ISGetIndices(is_A_all,&idxs);
1316: ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all);
1317: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B);
1318: ISDestroy(&is_B_all);
1319: ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all);
1320: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B);
1321: ISDestroy(&is_B_all);
1322: ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R);
1323: ISRestoreIndices(is_A_all,&idxs);
1324: }
1325: ISGetLocalSize(msolv_ctx->is_R,&n_R);
1326: MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake);
1327: MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY);
1328: MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY);
1329: PCSetOperators(msolv_ctx->correction_solver,Afake,Afake);
1330: MatDestroy(&Afake);
1331: VecDestroy(&vec1_B);
1333: /* communicate benign info to solver context */
1334: if (benign_n) {
1335: PetscScalar *array;
1337: msolv_ctx->benign_n = benign_n;
1338: msolv_ctx->benign_zerodiag_subs = is_p_r;
1339: PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals);
1340: msolv_ctx->benign_csAIB = cs_AIB_mat;
1341: MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL);
1342: VecGetArray(msolv_ctx->benign_corr_work,&array);
1343: VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec);
1344: VecRestoreArray(msolv_ctx->benign_corr_work,&array);
1345: msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1346: }
1347: } else {
1348: if (sub_schurs->reuse_solver) {
1349: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1350: }
1351: PetscFree(sub_schurs->reuse_solver);
1352: }
1353: MatDestroy(&A);
1354: ISDestroy(&is_A_all);
1356: /* Work arrays */
1357: PetscMalloc1(max_subset_size*max_subset_size,&work);
1359: /* S_Ej_all */
1360: cum = cum2 = 0;
1361: MatDenseGetArrayRead(S_all,&rS_data);
1362: MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr);
1363: if (compute_Stilda) {
1364: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr);
1365: }
1366: for (i=0;i<sub_schurs->n_subs;i++) {
1367: PetscInt j;
1369: /* get S_E */
1370: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1371: if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1372: PetscInt k;
1373: for (k=0;k<subset_size;k++) {
1374: for (j=k;j<subset_size;j++) {
1375: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1376: work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]);
1377: }
1378: }
1379: } else { /* just copy to workspace */
1380: PetscInt k;
1381: for (k=0;k<subset_size;k++) {
1382: for (j=0;j<subset_size;j++) {
1383: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1384: }
1385: }
1386: }
1387: /* insert S_E values */
1388: if (sub_schurs->change) {
1389: Mat change_sub,SEj,T;
1391: /* change basis */
1392: KSPGetOperators(sub_schurs->change[i],&change_sub,NULL);
1393: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1394: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1395: Mat T2;
1396: MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2);
1397: MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1398: MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T);
1399: MatDestroy(&T2);
1400: } else {
1401: MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1402: }
1403: MatCopy(T,SEj,SAME_NONZERO_PATTERN);
1404: MatDestroy(&T);
1405: MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL);
1406: MatDestroy(&SEj);
1407: }
1408: if (deluxe) {
1409: PetscArraycpy(SEj_arr,work,subset_size*subset_size);
1410: /* if adaptivity is requested, invert S_E blocks */
1411: if (compute_Stilda) {
1412: Mat M;
1413: const PetscScalar *vals;
1414: PetscBool isdense,isdensecuda;
1416: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M);
1417: MatSetOption(M,MAT_SPD,sub_schurs->is_posdef);
1418: MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian);
1419: if (!PetscBTLookup(sub_schurs->is_edge,i)) {
1420: MatSetType(M,Stype);
1421: }
1422: PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense);
1423: PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda);
1424: if (use_cholesky) {
1425: MatCholeskyFactor(M,NULL,NULL);
1426: } else {
1427: MatLUFactor(M,NULL,NULL,NULL);
1428: }
1429: if (isdense) {
1430: MatSeqDenseInvertFactors_Private(M);
1431: #if defined(PETSC_HAVE_CUDA)
1432: } else if (isdensecuda) {
1433: MatSeqDenseCUDAInvertFactors_Private(M);
1434: #endif
1435: } else SETERRQ1(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype);
1436: MatDenseGetArrayRead(M,&vals);
1437: PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size);
1438: MatDenseRestoreArrayRead(M,&vals);
1439: MatDestroy(&M);
1440: }
1441: } else if (compute_Stilda) { /* not using deluxe */
1442: Mat SEj;
1443: Vec D;
1444: PetscScalar *array;
1446: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1447: VecGetArray(Dall,&array);
1448: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D);
1449: VecRestoreArray(Dall,&array);
1450: VecShift(D,-1.);
1451: MatDiagonalScale(SEj,D,D);
1452: MatDestroy(&SEj);
1453: VecDestroy(&D);
1454: PetscArraycpy(SEj_arr,work,subset_size*subset_size);
1455: }
1456: cum += subset_size;
1457: cum2 += subset_size*(size_schur + 1);
1458: SEj_arr += subset_size*subset_size;
1459: if (SEjinv_arr) SEjinv_arr += subset_size*subset_size;
1460: }
1461: MatDenseRestoreArrayRead(S_all,&rS_data);
1462: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr);
1463: if (compute_Stilda) {
1464: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr);
1465: }
1466: if (solver_S) {
1467: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1468: }
1470: /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1471: however, preliminary tests indicate using GPUs is still faster in the solve phase */
1472: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1473: if (reuse_solvers) {
1474: Mat St;
1475: MatFactorSchurStatus st;
1477: flg = PETSC_FALSE;
1478: PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL);
1479: MatFactorGetSchurComplement(F,&St,&st);
1480: MatBindToCPU(St,flg);
1481: MatFactorRestoreSchurComplement(F,&St,st);
1482: }
1483: #endif
1485: schur_factor = NULL;
1486: if (compute_Stilda && size_active_schur) {
1488: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr);
1489: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1490: PetscArraycpy(SEjinv_arr,work,size_schur*size_schur);
1491: } else {
1492: Mat S_all_inv=NULL;
1494: if (solver_S) {
1495: /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1496: The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1497: if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1498: PetscScalar *data;
1499: PetscInt nd = 0;
1501: if (!use_potr) {
1502: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1503: }
1504: MatFactorGetSchurComplement(F,&S_all_inv,NULL);
1505: MatDenseGetArray(S_all_inv,&data);
1506: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1507: ISGetLocalSize(sub_schurs->is_dir,&nd);
1508: }
1510: /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1511: if (schur_has_vertices) {
1512: Mat M;
1513: PetscScalar *tdata;
1514: PetscInt nv = 0, news;
1516: ISGetLocalSize(sub_schurs->is_vertices,&nv);
1517: news = size_active_schur + nv;
1518: PetscCalloc1(news*news,&tdata);
1519: for (i=0;i<size_active_schur;i++) {
1520: PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i);
1521: PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv);
1522: }
1523: for (i=0;i<nv;i++) {
1524: PetscInt k = i+size_active_schur;
1525: PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i);
1526: }
1528: MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M);
1529: MatSetOption(M,MAT_SPD,PETSC_TRUE);
1530: MatCholeskyFactor(M,NULL,NULL);
1531: /* save the factors */
1532: cum = 0;
1533: PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor);
1534: for (i=0;i<size_active_schur;i++) {
1535: PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i);
1536: cum += size_active_schur - i;
1537: }
1538: for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1539: MatSeqDenseInvertFactors_Private(M);
1540: /* move back just the active dofs to the Schur complement */
1541: for (i=0;i<size_active_schur;i++) {
1542: PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur);
1543: }
1544: PetscFree(tdata);
1545: MatDestroy(&M);
1546: } else { /* we can factorize and invert just the activedofs */
1547: Mat M;
1548: PetscScalar *aux;
1550: PetscMalloc1(nd,&aux);
1551: for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1552: MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M);
1553: MatDenseSetLDA(M,size_schur);
1554: MatSetOption(M,MAT_SPD,PETSC_TRUE);
1555: MatCholeskyFactor(M,NULL,NULL);
1556: MatSeqDenseInvertFactors_Private(M);
1557: MatDestroy(&M);
1558: MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M);
1559: MatZeroEntries(M);
1560: MatDestroy(&M);
1561: MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M);
1562: MatDenseSetLDA(M,size_schur);
1563: MatZeroEntries(M);
1564: MatDestroy(&M);
1565: for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1566: PetscFree(aux);
1567: }
1568: MatDenseRestoreArray(S_all_inv,&data);
1569: } else { /* use MatFactor calls to invert S */
1570: MatFactorInvertSchurComplement(F);
1571: MatFactorGetSchurComplement(F,&S_all_inv,NULL);
1572: }
1573: } else { /* we need to invert explicitly since we are not using MatFactor for S */
1574: PetscObjectReference((PetscObject)S_all);
1575: S_all_inv = S_all;
1576: MatDenseGetArray(S_all_inv,&S_data);
1577: PetscBLASIntCast(size_schur,&B_N);
1578: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1579: if (use_potr) {
1580: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
1581: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1582: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
1583: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1584: } else if (use_sytr) {
1585: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1586: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1587: PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
1588: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1589: } else {
1590: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
1591: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1592: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1593: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1594: }
1595: PetscLogFlops(1.0*size_schur*size_schur*size_schur);
1596: PetscFPTrapPop();
1597: MatDenseRestoreArray(S_all_inv,&S_data);
1598: }
1599: /* S_Ej_tilda_all */
1600: cum = cum2 = 0;
1601: MatDenseGetArrayRead(S_all_inv,&rS_data);
1602: for (i=0;i<sub_schurs->n_subs;i++) {
1603: PetscInt j;
1605: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1606: /* get (St^-1)_E */
1607: /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1608: will be properly accessed later during adaptive selection */
1609: if (S_lower_triangular) {
1610: PetscInt k;
1611: if (sub_schurs->change) {
1612: for (k=0;k<subset_size;k++) {
1613: for (j=k;j<subset_size;j++) {
1614: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1615: work[j*subset_size+k] = work[k*subset_size+j];
1616: }
1617: }
1618: } else {
1619: for (k=0;k<subset_size;k++) {
1620: for (j=k;j<subset_size;j++) {
1621: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1622: }
1623: }
1624: }
1625: } else {
1626: PetscInt k;
1627: for (k=0;k<subset_size;k++) {
1628: for (j=0;j<subset_size;j++) {
1629: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1630: }
1631: }
1632: }
1633: if (sub_schurs->change) {
1634: Mat change_sub,SEj,T;
1636: /* change basis */
1637: KSPGetOperators(sub_schurs->change[i],&change_sub,NULL);
1638: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1639: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1640: Mat T2;
1641: MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2);
1642: MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1643: MatDestroy(&T2);
1644: MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T);
1645: } else {
1646: MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1647: }
1648: MatCopy(T,SEj,SAME_NONZERO_PATTERN);
1649: MatDestroy(&T);
1650: /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */
1651: MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL);
1652: MatDestroy(&SEj);
1653: }
1654: PetscArraycpy(SEjinv_arr,work,subset_size*subset_size);
1655: cum += subset_size;
1656: cum2 += subset_size*(size_schur + 1);
1657: SEjinv_arr += subset_size*subset_size;
1658: }
1659: MatDenseRestoreArrayRead(S_all_inv,&rS_data);
1660: if (solver_S) {
1661: if (schur_has_vertices) {
1662: MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED);
1663: } else {
1664: MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED);
1665: }
1666: }
1667: MatDestroy(&S_all_inv);
1668: }
1669: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr);
1671: /* move back factors if needed */
1672: if (schur_has_vertices) {
1673: Mat S_tmp;
1674: PetscInt nd = 0;
1676: if (!solver_S) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
1677: MatFactorGetSchurComplement(F,&S_tmp,NULL);
1678: if (use_potr) {
1679: PetscScalar *data;
1681: MatDenseGetArray(S_tmp,&data);
1682: PetscArrayzero(data,size_schur*size_schur);
1684: if (S_lower_triangular) {
1685: cum = 0;
1686: for (i=0;i<size_active_schur;i++) {
1687: PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i);
1688: cum += size_active_schur-i;
1689: }
1690: } else {
1691: PetscArraycpy(data,schur_factor,size_schur*size_schur);
1692: }
1693: if (sub_schurs->is_dir) {
1694: ISGetLocalSize(sub_schurs->is_dir,&nd);
1695: for (i=0;i<nd;i++) {
1696: data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1697: }
1698: }
1699: /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1700: set the diagonal entry of the Schur factor to a very large value */
1701: for (i=size_active_schur+nd;i<size_schur;i++) {
1702: data[i*(size_schur+1)] = infty;
1703: }
1704: MatDenseRestoreArray(S_tmp,&data);
1705: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1706: MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED);
1707: }
1708: } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */
1709: PetscScalar *data;
1710: PetscInt nd = 0;
1712: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1713: ISGetLocalSize(sub_schurs->is_dir,&nd);
1714: }
1715: MatFactorGetSchurComplement(F,&S_all,NULL);
1716: MatDenseGetArray(S_all,&data);
1717: for (i=0;i<size_active_schur;i++) {
1718: PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur);
1719: }
1720: for (i=size_active_schur+nd;i<size_schur;i++) {
1721: PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur);
1722: data[i*(size_schur+1)] = infty;
1723: }
1724: MatDenseRestoreArray(S_all,&data);
1725: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1726: }
1727: PetscFree(work);
1728: PetscFree(schur_factor);
1729: VecDestroy(&Dall);
1730: }
1731: ISDestroy(&is_I_layer);
1732: MatDestroy(&S_all);
1733: MatDestroy(&A_BB);
1734: MatDestroy(&A_IB);
1735: MatDestroy(&A_BI);
1736: MatDestroy(&F);
1738: PetscMalloc1(sub_schurs->n_subs,&nnz);
1739: for (i=0;i<sub_schurs->n_subs;i++) {
1740: ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]);
1741: }
1742: ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer);
1743: MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz);
1744: MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
1745: MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
1746: if (compute_Stilda) {
1747: MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz);
1748: MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
1749: MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
1750: if (deluxe) {
1751: MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz);
1752: MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
1753: MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
1754: }
1755: }
1756: ISDestroy(&is_I_layer);
1758: /* Get local part of (\sum_j S_Ej) */
1759: if (!sub_schurs->sum_S_Ej_all) {
1760: MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all);
1761: }
1762: VecSet(gstash,0.0);
1763: MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray);
1764: VecPlaceArray(lstash,stasharray);
1765: VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1766: VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1767: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray);
1768: VecResetArray(lstash);
1769: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray);
1770: VecPlaceArray(lstash,stasharray);
1771: VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1772: VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1773: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray);
1774: VecResetArray(lstash);
1776: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1777: if (compute_Stilda) {
1778: VecSet(gstash,0.0);
1779: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray);
1780: VecPlaceArray(lstash,stasharray);
1781: VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1782: VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1783: VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1784: VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1785: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray);
1786: VecResetArray(lstash);
1787: if (deluxe) {
1788: VecSet(gstash,0.0);
1789: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray);
1790: VecPlaceArray(lstash,stasharray);
1791: VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1792: VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1793: VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1794: VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1795: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray);
1796: VecResetArray(lstash);
1797: } else {
1798: PetscScalar *array;
1799: PetscInt cum;
1801: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array);
1802: cum = 0;
1803: for (i=0;i<sub_schurs->n_subs;i++) {
1804: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1805: PetscBLASIntCast(subset_size,&B_N);
1806: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1807: if (use_potr) {
1808: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1809: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1810: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1811: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1812: } else if (use_sytr) {
1813: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1814: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1815: PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
1816: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1817: } else {
1818: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1819: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1820: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1821: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1822: }
1823: PetscLogFlops(1.0*subset_size*subset_size*subset_size);
1824: PetscFPTrapPop();
1825: cum += subset_size*subset_size;
1826: }
1827: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array);
1828: PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all);
1829: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1830: sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1831: }
1832: }
1833: VecDestroy(&lstash);
1834: VecDestroy(&gstash);
1835: VecScatterDestroy(&sstash);
1837: if (matl_dbg_viewer) {
1838: PetscInt cum;
1840: if (sub_schurs->S_Ej_all) {
1841: PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE");
1842: MatView(sub_schurs->S_Ej_all,matl_dbg_viewer);
1843: }
1844: if (sub_schurs->sum_S_Ej_all) {
1845: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE");
1846: MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer);
1847: }
1848: if (sub_schurs->sum_S_Ej_inv_all) {
1849: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm");
1850: MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer);
1851: }
1852: if (sub_schurs->sum_S_Ej_tilda_all) {
1853: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt");
1854: MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer);
1855: }
1856: for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
1857: IS is;
1858: char name[16];
1860: PetscSNPrintf(name,sizeof(name),"IE%D",i);
1861: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1862: ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is);
1863: PetscObjectSetName((PetscObject)is,name);
1864: ISView(is,matl_dbg_viewer);
1865: ISDestroy(&is);
1866: cum += subset_size;
1867: }
1868: }
1870: /* free workspace */
1871: PetscViewerDestroy(&matl_dbg_viewer);
1872: PetscFree2(Bwork,pivots);
1873: PetscCommDestroy(&comm_n);
1874: return(0);
1875: }
1877: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc)
1878: {
1879: IS *faces,*edges,*all_cc,vertices;
1880: PetscInt i,n_faces,n_edges,n_all_cc;
1881: PetscBool is_sorted,ispardiso,ismumps;
1882: PetscErrorCode ierr;
1885: ISSorted(is_I,&is_sorted);
1886: if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1887: ISSorted(is_B,&is_sorted);
1888: if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1890: /* reset any previous data */
1891: PCBDDCSubSchursReset(sub_schurs);
1893: /* get index sets for faces and edges (already sorted by global ordering) */
1894: PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1895: n_all_cc = n_faces+n_edges;
1896: PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1897: PetscMalloc1(n_all_cc,&all_cc);
1898: for (i=0;i<n_faces;i++) {
1899: if (copycc) {
1900: ISDuplicate(faces[i],&all_cc[i]);
1901: } else {
1902: PetscObjectReference((PetscObject)faces[i]);
1903: all_cc[i] = faces[i];
1904: }
1905: }
1906: for (i=0;i<n_edges;i++) {
1907: if (copycc) {
1908: ISDuplicate(edges[i],&all_cc[n_faces+i]);
1909: } else {
1910: PetscObjectReference((PetscObject)edges[i]);
1911: all_cc[n_faces+i] = edges[i];
1912: }
1913: PetscBTSet(sub_schurs->is_edge,n_faces+i);
1914: }
1915: PetscObjectReference((PetscObject)vertices);
1916: sub_schurs->is_vertices = vertices;
1917: PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1918: sub_schurs->is_dir = NULL;
1919: PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);
1921: /* Determine if MatFactor can be used */
1922: PetscStrallocpy(prefix,&sub_schurs->prefix);
1923: #if defined(PETSC_HAVE_MUMPS)
1924: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type));
1925: #elif defined(PETSC_HAVE_MKL_PARDISO)
1926: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type));
1927: #else
1928: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type));
1929: #endif
1930: #if defined(PETSC_USE_COMPLEX)
1931: sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1932: #else
1933: sub_schurs->is_hermitian = PETSC_TRUE;
1934: #endif
1935: sub_schurs->is_posdef = PETSC_TRUE;
1936: sub_schurs->is_symmetric = PETSC_TRUE;
1937: sub_schurs->debug = PETSC_FALSE;
1938: sub_schurs->restrict_comm = PETSC_FALSE;
1939: PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
1940: PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL);
1941: PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL);
1942: PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL);
1943: PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL);
1944: PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL);
1945: PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL);
1946: PetscOptionsEnd();
1947: PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps);
1948: PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso);
1949: sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1951: /* for reals, symmetric and hermitian are synonims */
1952: #if !defined(PETSC_USE_COMPLEX)
1953: sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1954: sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1955: #endif
1957: PetscObjectReference((PetscObject)is_I);
1958: sub_schurs->is_I = is_I;
1959: PetscObjectReference((PetscObject)is_B);
1960: sub_schurs->is_B = is_B;
1961: PetscObjectReference((PetscObject)graph->l2gmap);
1962: sub_schurs->l2gmap = graph->l2gmap;
1963: PetscObjectReference((PetscObject)BtoNmap);
1964: sub_schurs->BtoNmap = BtoNmap;
1965: sub_schurs->n_subs = n_all_cc;
1966: sub_schurs->is_subs = all_cc;
1967: sub_schurs->S_Ej_all = NULL;
1968: sub_schurs->sum_S_Ej_all = NULL;
1969: sub_schurs->sum_S_Ej_inv_all = NULL;
1970: sub_schurs->sum_S_Ej_tilda_all = NULL;
1971: sub_schurs->is_Ej_all = NULL;
1972: return(0);
1973: }
1975: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1976: {
1977: PCBDDCSubSchurs schurs_ctx;
1978: PetscErrorCode ierr;
1981: PetscNew(&schurs_ctx);
1982: schurs_ctx->n_subs = 0;
1983: *sub_schurs = schurs_ctx;
1984: return(0);
1985: }
1987: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1988: {
1989: PetscInt i;
1993: if (!sub_schurs) return(0);
1994: PetscFree(sub_schurs->prefix);
1995: MatDestroy(&sub_schurs->A);
1996: MatDestroy(&sub_schurs->S);
1997: ISDestroy(&sub_schurs->is_I);
1998: ISDestroy(&sub_schurs->is_B);
1999: ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
2000: ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
2001: MatDestroy(&sub_schurs->S_Ej_all);
2002: MatDestroy(&sub_schurs->sum_S_Ej_all);
2003: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
2004: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
2005: ISDestroy(&sub_schurs->is_Ej_all);
2006: ISDestroy(&sub_schurs->is_vertices);
2007: ISDestroy(&sub_schurs->is_dir);
2008: PetscBTDestroy(&sub_schurs->is_edge);
2009: for (i=0;i<sub_schurs->n_subs;i++) {
2010: ISDestroy(&sub_schurs->is_subs[i]);
2011: }
2012: if (sub_schurs->n_subs) {
2013: PetscFree(sub_schurs->is_subs);
2014: }
2015: if (sub_schurs->reuse_solver) {
2016: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
2017: }
2018: PetscFree(sub_schurs->reuse_solver);
2019: if (sub_schurs->change) {
2020: for (i=0;i<sub_schurs->n_subs;i++) {
2021: KSPDestroy(&sub_schurs->change[i]);
2022: ISDestroy(&sub_schurs->change_primal_sub[i]);
2023: }
2024: }
2025: PetscFree(sub_schurs->change);
2026: PetscFree(sub_schurs->change_primal_sub);
2027: sub_schurs->n_subs = 0;
2028: return(0);
2029: }
2031: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
2032: {
2036: PCBDDCSubSchursReset(*sub_schurs);
2037: PetscFree(*sub_schurs);
2038: return(0);
2039: }
2041: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
2042: {
2043: PetscInt i,j,n;
2047: n = 0;
2048: for (i=-n_prev;i<0;i++) {
2049: PetscInt start_dof = queue_tip[i];
2050: for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
2051: PetscInt dof = adjncy[j];
2052: if (!PetscBTLookup(touched,dof)) {
2053: PetscBTSet(touched,dof);
2054: queue_tip[n] = dof;
2055: n++;
2056: }
2057: }
2058: }
2059: *n_added = n;
2060: return(0);
2061: }