Actual source code: fninvsqrt.c

slepc-3.17.2 2022-08-09
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Inverse square root function  x^(-1/2)
 12: */

 14: #include <slepc/private/fnimpl.h>
 15: #include <slepcblaslapack.h>

 17: PetscErrorCode FNEvaluateFunction_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
 18: {
 20: #if !defined(PETSC_USE_COMPLEX)
 22: #endif
 23:   *y = 1.0/PetscSqrtScalar(x);
 24:   PetscFunctionReturn(0);
 25: }

 27: PetscErrorCode FNEvaluateDerivative_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
 28: {
 30: #if !defined(PETSC_USE_COMPLEX)
 32: #endif
 33:   *y = -1.0/(2.0*PetscPowScalarReal(x,1.5));
 34:   PetscFunctionReturn(0);
 35: }

 37: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Schur(FN fn,Mat A,Mat B)
 38: {
 39:   PetscBLASInt   n=0,ld,*ipiv,info;
 40:   PetscScalar    *Ba,*Wa;
 41:   PetscInt       m;
 42:   Mat            W;

 44:   FN_AllocateWorkMat(fn,A,&W);
 45:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
 46:   MatDenseGetArray(B,&Ba);
 47:   MatDenseGetArray(W,&Wa);
 48:   /* compute B = sqrtm(A) */
 49:   MatGetSize(A,&m,NULL);
 50:   PetscBLASIntCast(m,&n);
 51:   ld = n;
 52:   FNSqrtmSchur(fn,n,Ba,n,PETSC_FALSE);
 53:   /* compute B = A\B */
 54:   PetscMalloc1(ld,&ipiv);
 55:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
 56:   SlepcCheckLapackInfo("gesv",info);
 57:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
 58:   PetscFree(ipiv);
 59:   MatDenseRestoreArray(W,&Wa);
 60:   MatDenseRestoreArray(B,&Ba);
 61:   FN_FreeWorkMat(fn,&W);
 62:   PetscFunctionReturn(0);
 63: }

 65: PetscErrorCode FNEvaluateFunctionMatVec_Invsqrt_Schur(FN fn,Mat A,Vec v)
 66: {
 67:   PetscBLASInt   n=0,ld,*ipiv,info,one=1;
 68:   PetscScalar    *Ba,*Wa;
 69:   PetscInt       m;
 70:   Mat            B,W;

 72:   FN_AllocateWorkMat(fn,A,&B);
 73:   FN_AllocateWorkMat(fn,A,&W);
 74:   MatDenseGetArray(B,&Ba);
 75:   MatDenseGetArray(W,&Wa);
 76:   /* compute B_1 = sqrtm(A)*e_1 */
 77:   MatGetSize(A,&m,NULL);
 78:   PetscBLASIntCast(m,&n);
 79:   ld = n;
 80:   FNSqrtmSchur(fn,n,Ba,n,PETSC_TRUE);
 81:   /* compute B_1 = A\B_1 */
 82:   PetscMalloc1(ld,&ipiv);
 83:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&one,Wa,&ld,ipiv,Ba,&ld,&info));
 84:   SlepcCheckLapackInfo("gesv",info);
 85:   PetscFree(ipiv);
 86:   MatDenseRestoreArray(W,&Wa);
 87:   MatDenseRestoreArray(B,&Ba);
 88:   MatGetColumnVector(B,v,0);
 89:   FN_FreeWorkMat(fn,&W);
 90:   FN_FreeWorkMat(fn,&B);
 91:   PetscFunctionReturn(0);
 92: }

 94: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP(FN fn,Mat A,Mat B)
 95: {
 96:   PetscBLASInt   n=0;
 97:   PetscScalar    *T;
 98:   PetscInt       m;

100:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
101:   MatDenseGetArray(B,&T);
102:   MatGetSize(A,&m,NULL);
103:   PetscBLASIntCast(m,&n);
104:   FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_TRUE);
105:   MatDenseRestoreArray(B,&T);
106:   PetscFunctionReturn(0);
107: }

109: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS(FN fn,Mat A,Mat B)
110: {
111:   PetscBLASInt   n=0;
112:   PetscScalar    *T;
113:   PetscInt       m;

115:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
116:   MatDenseGetArray(B,&T);
117:   MatGetSize(A,&m,NULL);
118:   PetscBLASIntCast(m,&n);
119:   FNSqrtmNewtonSchulz(fn,n,T,n,PETSC_TRUE);
120:   MatDenseRestoreArray(B,&T);
121:   PetscFunctionReturn(0);
122: }

124: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi(FN fn,Mat A,Mat B)
125: {
126:   PetscBLASInt   n=0,ld,*ipiv,info;
127:   PetscScalar    *Ba,*Wa;
128:   PetscInt       m;
129:   Mat            W;

131:   FN_AllocateWorkMat(fn,A,&W);
132:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
133:   MatDenseGetArray(B,&Ba);
134:   MatDenseGetArray(W,&Wa);
135:   /* compute B = sqrtm(A) */
136:   MatGetSize(A,&m,NULL);
137:   PetscBLASIntCast(m,&n);
138:   ld = n;
139:   FNSqrtmSadeghi(fn,n,Ba,n);
140:   /* compute B = A\B */
141:   PetscMalloc1(ld,&ipiv);
142:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
143:   SlepcCheckLapackInfo("gesv",info);
144:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
145:   PetscFree(ipiv);
146:   MatDenseRestoreArray(W,&Wa);
147:   MatDenseRestoreArray(B,&Ba);
148:   FN_FreeWorkMat(fn,&W);
149:   PetscFunctionReturn(0);
150: }

152: #if defined(PETSC_HAVE_CUDA)
153: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS_CUDA(FN fn,Mat A,Mat B)
154: {
155:   PetscBLASInt   n=0;
156:   PetscScalar    *Ba;
157:   PetscInt       m;

159:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
160:   MatDenseGetArray(B,&Ba);
161:   MatGetSize(A,&m,NULL);
162:   PetscBLASIntCast(m,&n);
163:   FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_TRUE);
164:   MatDenseRestoreArray(B,&Ba);
165:   PetscFunctionReturn(0);
166: }

168: #if defined(PETSC_HAVE_MAGMA)
169: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm(FN fn,Mat A,Mat B)
170: {
171:   PetscBLASInt   n=0;
172:   PetscScalar    *T;
173:   PetscInt       m;

175:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
176:   MatDenseGetArray(B,&T);
177:   MatGetSize(A,&m,NULL);
178:   PetscBLASIntCast(m,&n);
179:   FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_TRUE);
180:   MatDenseRestoreArray(B,&T);
181:   PetscFunctionReturn(0);
182: }

184: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B)
185: {
186:   PetscBLASInt   n=0,ld,*ipiv,info;
187:   PetscScalar    *Ba,*Wa;
188:   PetscInt       m;
189:   Mat            W;

191:   FN_AllocateWorkMat(fn,A,&W);
192:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
193:   MatDenseGetArray(B,&Ba);
194:   MatDenseGetArray(W,&Wa);
195:   /* compute B = sqrtm(A) */
196:   MatGetSize(A,&m,NULL);
197:   PetscBLASIntCast(m,&n);
198:   ld = n;
199:   FNSqrtmSadeghi_CUDAm(fn,n,Ba,n);
200:   /* compute B = A\B */
201:   PetscMalloc1(ld,&ipiv);
202:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
203:   SlepcCheckLapackInfo("gesv",info);
204:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
205:   PetscFree(ipiv);
206:   MatDenseRestoreArray(W,&Wa);
207:   MatDenseRestoreArray(B,&Ba);
208:   FN_FreeWorkMat(fn,&W);
209:   PetscFunctionReturn(0);
210: }
211: #endif /* PETSC_HAVE_MAGMA */
212: #endif /* PETSC_HAVE_CUDA */

214: PetscErrorCode FNView_Invsqrt(FN fn,PetscViewer viewer)
215: {
216:   PetscBool      isascii;
217:   char           str[50];
218:   const char     *methodname[] = {
219:                   "Schur method for inv(A)*sqrtm(A)",
220:                   "Denman-Beavers (product form)",
221:                   "Newton-Schulz iteration",
222:                   "Sadeghi iteration"
223: #if defined(PETSC_HAVE_CUDA)
224:                  ,"Newton-Schulz iteration CUDA"
225: #if defined(PETSC_HAVE_MAGMA)
226:                  ,"Denman-Beavers (product form) CUDA/MAGMA",
227:                   "Sadeghi iteration CUDA/MAGMA"
228: #endif
229: #endif
230:   };
231:   const int      nmeth=sizeof(methodname)/sizeof(methodname[0]);

233:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
234:   if (isascii) {
235:     if (fn->beta==(PetscScalar)1.0) {
236:       if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer,"  Inverse square root: x^(-1/2)\n");
237:       else {
238:         SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
239:         PetscViewerASCIIPrintf(viewer,"  Inverse square root: (%s*x)^(-1/2)\n",str);
240:       }
241:     } else {
242:       SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
243:       if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer,"  Inverse square root: %s*x^(-1/2)\n",str);
244:       else {
245:         PetscViewerASCIIPrintf(viewer,"  Inverse square root: %s",str);
246:         PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
247:         SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
248:         PetscViewerASCIIPrintf(viewer,"*(%s*x)^(-1/2)\n",str);
249:         PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
250:       }
251:     }
252:     if (fn->method<nmeth) PetscViewerASCIIPrintf(viewer,"  computing matrix functions with: %s\n",methodname[fn->method]);
253:   }
254:   PetscFunctionReturn(0);
255: }

257: SLEPC_EXTERN PetscErrorCode FNCreate_Invsqrt(FN fn)
258: {
259:   fn->ops->evaluatefunction          = FNEvaluateFunction_Invsqrt;
260:   fn->ops->evaluatederivative        = FNEvaluateDerivative_Invsqrt;
261:   fn->ops->evaluatefunctionmat[0]    = FNEvaluateFunctionMat_Invsqrt_Schur;
262:   fn->ops->evaluatefunctionmat[1]    = FNEvaluateFunctionMat_Invsqrt_DBP;
263:   fn->ops->evaluatefunctionmat[2]    = FNEvaluateFunctionMat_Invsqrt_NS;
264:   fn->ops->evaluatefunctionmat[3]    = FNEvaluateFunctionMat_Invsqrt_Sadeghi;
265: #if defined(PETSC_HAVE_CUDA)
266:   fn->ops->evaluatefunctionmat[4]    = FNEvaluateFunctionMat_Invsqrt_NS_CUDA;
267: #if defined(PETSC_HAVE_MAGMA)
268:   fn->ops->evaluatefunctionmat[5]    = FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm;
269:   fn->ops->evaluatefunctionmat[6]    = FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm;
270: #endif /* PETSC_HAVE_MAGMA */
271: #endif /* PETSC_HAVE_CUDA */
272:   fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Invsqrt_Schur;
273:   fn->ops->view                      = FNView_Invsqrt;
274:   PetscFunctionReturn(0);
275: }