Actual source code: axpy.c
petsc-3.15.0 2021-03-30
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5: {
6: PetscErrorCode ierr,(*f)(Mat,Mat*);
7: Mat A,F;
10: PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
11: if (f) {
12: MatTransposeGetMat(T,&A);
13: if (T == X) {
14: PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
15: MatTranspose(A,MAT_INITIAL_MATRIX,&F);
16: A = Y;
17: } else {
18: PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
19: MatTranspose(X,MAT_INITIAL_MATRIX,&F);
20: }
21: } else {
22: MatHermitianTransposeGetMat(T,&A);
23: if (T == X) {
24: PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
25: MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
26: A = Y;
27: } else {
28: PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
29: MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
30: }
31: }
32: MatAXPY(A,a,F,str);
33: MatDestroy(&F);
34: return(0);
35: }
37: /*@
38: MatAXPY - Computes Y = a*X + Y.
40: Logically Collective on Mat
42: Input Parameters:
43: + a - the scalar multiplier
44: . X - the first matrix
45: . Y - the second matrix
46: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
47: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
49: Notes: No operation is performed when a is zero.
51: Level: intermediate
53: .seealso: MatAYPX()
54: @*/
55: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
56: {
58: PetscInt M1,M2,N1,N2;
59: PetscInt m1,m2,n1,n2;
60: MatType t1,t2;
61: PetscBool sametype,transpose;
68: MatGetSize(X,&M1,&N1);
69: MatGetSize(Y,&M2,&N2);
70: MatGetLocalSize(X,&m1,&n1);
71: MatGetLocalSize(Y,&m2,&n2);
72: if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes X %D x %D, Y %D x %D",M1,N1,M2,N2);
73: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes X %D x %D, Y %D x %D",m1,n1,m2,n2);
74: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
75: if (!X->assembled) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
76: if (a == (PetscScalar)0.0) return(0);
77: if (Y == X) {
78: MatScale(Y,1.0 + a);
79: return(0);
80: }
81: MatGetType(X,&t1);
82: MatGetType(Y,&t2);
83: PetscStrcmp(t1,t2,&sametype);
84: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
85: if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
86: (*Y->ops->axpy)(Y,a,X,str);
87: } else {
88: PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
89: if (transpose) {
90: MatTransposeAXPY_Private(Y,a,X,str,X);
91: } else {
92: PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
93: if (transpose) {
94: MatTransposeAXPY_Private(Y,a,X,str,Y);
95: } else {
96: MatAXPY_Basic(Y,a,X,str);
97: }
98: }
99: }
100: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
101: return(0);
102: }
104: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
105: {
107: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
110: /* look for any available faster alternative to the general preallocator */
111: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
112: if (preall) {
113: (*preall)(Y,X,B);
114: } else { /* Use MatPrellocator, assumes same row-col distribution */
115: Mat preallocator;
116: PetscInt r,rstart,rend;
117: PetscInt m,n,M,N;
119: MatGetRowUpperTriangular(Y);
120: MatGetRowUpperTriangular(X);
121: MatGetSize(Y,&M,&N);
122: MatGetLocalSize(Y,&m,&n);
123: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
124: MatSetType(preallocator,MATPREALLOCATOR);
125: MatSetLayouts(preallocator,Y->rmap,Y->cmap);
126: MatSetUp(preallocator);
127: MatGetOwnershipRange(preallocator,&rstart,&rend);
128: for (r = rstart; r < rend; ++r) {
129: PetscInt ncols;
130: const PetscInt *row;
131: const PetscScalar *vals;
133: MatGetRow(Y,r,&ncols,&row,&vals);
134: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
135: MatRestoreRow(Y,r,&ncols,&row,&vals);
136: MatGetRow(X,r,&ncols,&row,&vals);
137: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
138: MatRestoreRow(X,r,&ncols,&row,&vals);
139: }
140: MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
141: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
142: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
143: MatRestoreRowUpperTriangular(Y);
144: MatRestoreRowUpperTriangular(X);
146: MatCreate(PetscObjectComm((PetscObject)Y),B);
147: MatSetType(*B,((PetscObject)Y)->type_name);
148: MatSetLayouts(*B,Y->rmap,Y->cmap);
149: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
150: MatDestroy(&preallocator);
151: }
152: return(0);
153: }
155: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
156: {
158: PetscBool isshell,isdense,isnest;
161: MatIsShell(Y,&isshell);
162: if (isshell) { /* MatShell has special support for AXPY */
163: PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
165: MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
166: if (f) {
167: (*f)(Y,a,X,str);
168: return(0);
169: }
170: }
171: /* no need to preallocate if Y is dense */
172: PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"");
173: if (isdense) {
174: PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest);
175: if (isnest) {
176: MatAXPY_Dense_Nest(Y,a,X);
177: return(0);
178: }
179: if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
180: }
181: if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
182: PetscInt i,start,end,j,ncols,m,n;
183: const PetscInt *row;
184: PetscScalar *val;
185: const PetscScalar *vals;
187: MatGetSize(X,&m,&n);
188: MatGetOwnershipRange(X,&start,&end);
189: MatGetRowUpperTriangular(X);
190: if (a == 1.0) {
191: for (i = start; i < end; i++) {
192: MatGetRow(X,i,&ncols,&row,&vals);
193: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
194: MatRestoreRow(X,i,&ncols,&row,&vals);
195: }
196: } else {
197: PetscInt vs = 100;
198: /* realloc if needed, as this function may be used in parallel */
199: PetscMalloc1(vs,&val);
200: for (i=start; i<end; i++) {
201: MatGetRow(X,i,&ncols,&row,&vals);
202: if (vs < ncols) {
203: vs = PetscMin(2*ncols,n);
204: PetscRealloc(vs*sizeof(*val),&val);
205: }
206: for (j=0; j<ncols; j++) val[j] = a*vals[j];
207: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
208: MatRestoreRow(X,i,&ncols,&row,&vals);
209: }
210: PetscFree(val);
211: }
212: MatRestoreRowUpperTriangular(X);
213: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
214: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
215: } else {
216: Mat B;
218: MatAXPY_Basic_Preallocate(Y,X,&B);
219: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
220: /* TODO mat_tests-ex37_nsize-1_mat_type-baij_mat_block_size-2 fails with MatHeaderMerge */
221: MatHeaderReplace(Y,&B);
222: }
223: return(0);
224: }
226: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
227: {
228: PetscInt i,start,end,j,ncols,m,n;
229: PetscErrorCode ierr;
230: const PetscInt *row;
231: PetscScalar *val;
232: const PetscScalar *vals;
235: MatGetSize(X,&m,&n);
236: MatGetOwnershipRange(X,&start,&end);
237: MatGetRowUpperTriangular(Y);
238: MatGetRowUpperTriangular(X);
239: if (a == 1.0) {
240: for (i = start; i < end; i++) {
241: MatGetRow(Y,i,&ncols,&row,&vals);
242: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
243: MatRestoreRow(Y,i,&ncols,&row,&vals);
245: MatGetRow(X,i,&ncols,&row,&vals);
246: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
247: MatRestoreRow(X,i,&ncols,&row,&vals);
248: }
249: } else {
250: PetscInt vs = 100;
251: /* realloc if needed, as this function may be used in parallel */
252: PetscMalloc1(vs,&val);
253: for (i=start; i<end; i++) {
254: MatGetRow(Y,i,&ncols,&row,&vals);
255: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
256: MatRestoreRow(Y,i,&ncols,&row,&vals);
258: MatGetRow(X,i,&ncols,&row,&vals);
259: if (vs < ncols) {
260: vs = PetscMin(2*ncols,n);
261: PetscRealloc(vs*sizeof(*val),&val);
262: }
263: for (j=0; j<ncols; j++) val[j] = a*vals[j];
264: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
265: MatRestoreRow(X,i,&ncols,&row,&vals);
266: }
267: PetscFree(val);
268: }
269: MatRestoreRowUpperTriangular(Y);
270: MatRestoreRowUpperTriangular(X);
271: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
272: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
273: return(0);
274: }
276: /*@
277: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
279: Neighbor-wise Collective on Mat
281: Input Parameters:
282: + Y - the matrices
283: - a - the PetscScalar
285: Level: intermediate
287: Notes:
288: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
289: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
290: entry). No operation is performed when a is zero.
292: To form Y = Y + diag(V) use MatDiagonalSet()
294: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
295: @*/
296: PetscErrorCode MatShift(Mat Y,PetscScalar a)
297: {
302: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
303: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
304: MatCheckPreallocated(Y,1);
305: if (a == 0.0) return(0);
307: if (Y->ops->shift) {
308: (*Y->ops->shift)(Y,a);
309: } else {
310: MatShift_Basic(Y,a);
311: }
313: PetscObjectStateIncrease((PetscObject)Y);
314: return(0);
315: }
317: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
318: {
319: PetscErrorCode ierr;
320: PetscInt i,start,end;
321: const PetscScalar *v;
324: MatGetOwnershipRange(Y,&start,&end);
325: VecGetArrayRead(D,&v);
326: for (i=start; i<end; i++) {
327: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
328: }
329: VecRestoreArrayRead(D,&v);
330: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
331: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
332: return(0);
333: }
335: /*@
336: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
337: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
338: INSERT_VALUES.
340: Neighbor-wise Collective on Mat
342: Input Parameters:
343: + Y - the input matrix
344: . D - the diagonal matrix, represented as a vector
345: - i - INSERT_VALUES or ADD_VALUES
347: Notes:
348: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
349: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
350: entry).
352: Level: intermediate
354: .seealso: MatShift(), MatScale(), MatDiagonalScale()
355: @*/
356: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
357: {
359: PetscInt matlocal,veclocal;
364: MatGetLocalSize(Y,&matlocal,NULL);
365: VecGetLocalSize(D,&veclocal);
366: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
367: if (Y->ops->diagonalset) {
368: (*Y->ops->diagonalset)(Y,D,is);
369: } else {
370: MatDiagonalSet_Default(Y,D,is);
371: }
372: PetscObjectStateIncrease((PetscObject)Y);
373: return(0);
374: }
376: /*@
377: MatAYPX - Computes Y = a*Y + X.
379: Logically on Mat
381: Input Parameters:
382: + a - the PetscScalar multiplier
383: . Y - the first matrix
384: . X - the second matrix
385: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
387: Level: intermediate
389: .seealso: MatAXPY()
390: @*/
391: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
392: {
396: MatScale(Y,a);
397: MatAXPY(Y,1.0,X,str);
398: return(0);
399: }
401: /*@
402: MatComputeOperator - Computes the explicit matrix
404: Collective on Mat
406: Input Parameter:
407: + inmat - the matrix
408: - mattype - the matrix type for the explicit operator
410: Output Parameter:
411: . mat - the explict operator
413: Notes:
414: This computation is done by applying the operators to columns of the identity matrix.
415: This routine is costly in general, and is recommended for use only with relatively small systems.
416: Currently, this routine uses a dense matrix format if mattype == NULL.
418: Level: advanced
420: @*/
421: PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
422: {
428: MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
429: return(0);
430: }
432: /*@
433: MatComputeOperatorTranspose - Computes the explicit matrix representation of
434: a give matrix that can apply MatMultTranspose()
436: Collective on Mat
438: Input Parameter:
439: . inmat - the matrix
441: Output Parameter:
442: . mat - the explict operator transposed
444: Notes:
445: This computation is done by applying the transpose of the operator to columns of the identity matrix.
446: This routine is costly in general, and is recommended for use only with relatively small systems.
447: Currently, this routine uses a dense matrix format if mattype == NULL.
449: Level: advanced
451: @*/
452: PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
453: {
454: Mat A;
460: MatCreateTranspose(inmat,&A);
461: MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
462: MatDestroy(&A);
463: return(0);
464: }
466: /*@
467: MatChop - Set all values in the matrix less than the tolerance to zero
469: Input Parameters:
470: + A - The matrix
471: - tol - The zero tolerance
473: Output Parameters:
474: . A - The chopped matrix
476: Level: intermediate
478: .seealso: MatCreate(), MatZeroEntries()
479: @*/
480: PetscErrorCode MatChop(Mat A, PetscReal tol)
481: {
482: Mat a;
483: PetscScalar *newVals;
484: PetscInt *newCols;
485: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
486: PetscBool flg;
490: PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "");
491: if (flg) {
492: MatDenseGetLocalMatrix(A, &a);
493: MatDenseGetLDA(a, &r);
494: MatGetSize(a, &rStart, &rEnd);
495: MatDenseGetArray(a, &newVals);
496: for (; colMax < rEnd; ++colMax) {
497: for (maxRows = 0; maxRows < rStart; ++maxRows) {
498: newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
499: }
500: }
501: MatDenseRestoreArray(a, &newVals);
502: } else {
503: MatGetOwnershipRange(A, &rStart, &rEnd);
504: MatGetRowUpperTriangular(A);
505: for (r = rStart; r < rEnd; ++r) {
506: PetscInt ncols;
508: MatGetRow(A, r, &ncols, NULL, NULL);
509: colMax = PetscMax(colMax, ncols);
510: MatRestoreRow(A, r, &ncols, NULL, NULL);
511: }
512: numRows = rEnd - rStart;
513: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
514: PetscMalloc2(colMax,&newCols,colMax,&newVals);
515: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
516: for (r = rStart; r < rStart+maxRows; ++r) {
517: const PetscScalar *vals;
518: const PetscInt *cols;
519: PetscInt ncols, newcols, c;
521: if (r < rEnd) {
522: MatGetRow(A, r, &ncols, &cols, &vals);
523: for (c = 0; c < ncols; ++c) {
524: newCols[c] = cols[c];
525: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
526: }
527: newcols = ncols;
528: MatRestoreRow(A, r, &ncols, &cols, &vals);
529: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
530: }
531: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
532: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
533: }
534: MatRestoreRowUpperTriangular(A);
535: PetscFree2(newCols,newVals);
536: }
537: return(0);
538: }