Actual source code: epskrylov.c

slepc-3.14.1 2020-12-08
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Common subroutines for all Krylov-type solvers
 12: */

 14: #include <slepc/private/epsimpl.h>
 15: #include <slepc/private/slepcimpl.h>
 16: #include <slepcblaslapack.h>

 18: /*
 19:    EPSDelayedArnoldi - This function is equivalent to BVMatArnoldi but
 20:    performs the computation in a different way. The main idea is that
 21:    reorthogonalization is delayed to the next Arnoldi step. This version is
 22:    more scalable but in some cases convergence may stagnate.
 23: */
 24: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
 25: {
 27:   PetscInt       i,j,m=*M;
 28:   Vec            u,t;
 29:   PetscScalar    shh[100],*lhh,dot,dot2;
 30:   PetscReal      norm1=0.0,norm2=1.0;
 31:   Vec            vj,vj1,vj2;

 34:   if (m<=100) lhh = shh;
 35:   else {
 36:     PetscMalloc1(m,&lhh);
 37:   }
 38:   BVCreateVec(eps->V,&u);
 39:   BVCreateVec(eps->V,&t);

 41:   BVSetActiveColumns(eps->V,0,m);
 42:   for (j=k;j<m;j++) {
 43:     BVGetColumn(eps->V,j,&vj);
 44:     BVGetColumn(eps->V,j+1,&vj1);
 45:     STApply(eps->st,vj,vj1);
 46:     BVRestoreColumn(eps->V,j,&vj);
 47:     BVRestoreColumn(eps->V,j+1,&vj1);

 49:     BVDotColumnBegin(eps->V,j+1,H+ldh*j);
 50:     if (j>k) {
 51:       BVDotColumnBegin(eps->V,j,lhh);
 52:       BVGetColumn(eps->V,j,&vj);
 53:       VecDotBegin(vj,vj,&dot);
 54:     }
 55:     if (j>k+1) {
 56:       BVNormVecBegin(eps->V,u,NORM_2,&norm2);
 57:       BVGetColumn(eps->V,j-2,&vj2);
 58:       VecDotBegin(u,vj2,&dot2);
 59:     }

 61:     BVDotColumnEnd(eps->V,j+1,H+ldh*j);
 62:     if (j>k) {
 63:       BVDotColumnEnd(eps->V,j,lhh);
 64:       VecDotEnd(vj,vj,&dot);
 65:       BVRestoreColumn(eps->V,j,&vj);
 66:     }
 67:     if (j>k+1) {
 68:       BVNormVecEnd(eps->V,u,NORM_2,&norm2);
 69:       VecDotEnd(u,vj2,&dot2);
 70:       BVRestoreColumn(eps->V,j-2,&vj2);
 71:     }

 73:     if (j>k) {
 74:       norm1 = PetscSqrtReal(PetscRealPart(dot));
 75:       for (i=0;i<j;i++)
 76:         H[ldh*j+i] = H[ldh*j+i]/norm1;
 77:       H[ldh*j+j] = H[ldh*j+j]/dot;

 79:       BVCopyVec(eps->V,j,t);
 80:       BVScaleColumn(eps->V,j,1.0/norm1);
 81:       BVScaleColumn(eps->V,j+1,1.0/norm1);
 82:     }

 84:     BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);

 86:     if (j>k) {
 87:       BVSetActiveColumns(eps->V,0,j);
 88:       BVMultVec(eps->V,-1.0,1.0,t,lhh);
 89:       BVSetActiveColumns(eps->V,0,m);
 90:       for (i=0;i<j;i++)
 91:         H[ldh*(j-1)+i] += lhh[i];
 92:     }

 94:     if (j>k+1) {
 95:       BVGetColumn(eps->V,j-1,&vj1);
 96:       VecCopy(u,vj1);
 97:       BVRestoreColumn(eps->V,j-1,&vj1);
 98:       BVScaleColumn(eps->V,j-1,1.0/norm2);
 99:       H[ldh*(j-2)+j-1] = norm2;
100:     }

102:     if (j<m-1) {
103:       VecCopy(t,u);
104:     }
105:   }

107:   BVNormVec(eps->V,t,NORM_2,&norm2);
108:   VecScale(t,1.0/norm2);
109:   BVGetColumn(eps->V,m-1,&vj1);
110:   VecCopy(t,vj1);
111:   BVRestoreColumn(eps->V,m-1,&vj1);
112:   H[ldh*(m-2)+m-1] = norm2;

114:   BVDotColumn(eps->V,m,lhh);

116:   BVMultColumn(eps->V,-1.0,1.0,m,lhh);
117:   for (i=0;i<m;i++)
118:     H[ldh*(m-1)+i] += lhh[i];

120:   BVNormColumn(eps->V,m,NORM_2,beta);
121:   BVScaleColumn(eps->V,m,1.0 / *beta);
122:   *breakdown = PETSC_FALSE;

124:   if (m>100) { PetscFree(lhh); }
125:   VecDestroy(&u);
126:   VecDestroy(&t);
127:   return(0);
128: }

130: /*
131:    EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi,
132:    but without reorthogonalization (only delayed normalization).
133: */
134: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
135: {
137:   PetscInt       i,j,m=*M;
138:   PetscScalar    dot;
139:   PetscReal      norm=0.0;
140:   Vec            vj,vj1;

143:   BVSetActiveColumns(eps->V,0,m);
144:   for (j=k;j<m;j++) {
145:     BVGetColumn(eps->V,j,&vj);
146:     BVGetColumn(eps->V,j+1,&vj1);
147:     STApply(eps->st,vj,vj1);
148:     BVRestoreColumn(eps->V,j+1,&vj1);
149:     BVDotColumnBegin(eps->V,j+1,H+ldh*j);
150:     if (j>k) {
151:       VecDotBegin(vj,vj,&dot);
152:     }
153:     BVDotColumnEnd(eps->V,j+1,H+ldh*j);
154:     if (j>k) {
155:       VecDotEnd(vj,vj,&dot);
156:     }
157:     BVRestoreColumn(eps->V,j,&vj);

159:     if (j>k) {
160:       norm = PetscSqrtReal(PetscRealPart(dot));
161:       BVScaleColumn(eps->V,j,1.0/norm);
162:       H[ldh*(j-1)+j] = norm;

164:       for (i=0;i<j;i++)
165:         H[ldh*j+i] = H[ldh*j+i]/norm;
166:       H[ldh*j+j] = H[ldh*j+j]/dot;
167:       BVScaleColumn(eps->V,j+1,1.0/norm);
168:       *beta = norm;
169:     }
170:     BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
171:   }

173:   *breakdown = PETSC_FALSE;
174:   return(0);
175: }

177: /*
178:    EPSKrylovConvergence_Filter - Specialized version for STFILTER.
179: */
180: PetscErrorCode EPSKrylovConvergence_Filter(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal gamma,PetscInt *kout)
181: {
183:   PetscInt       k,ninside,nconv;
184:   PetscScalar    re,im;
185:   PetscReal      resnorm;

188:   ninside = 0;   /* count how many eigenvalues are located in the interval */
189:   for (k=kini;k<kini+nits;k++) {
190:     if (PetscRealPart(eps->eigr[k]) < gamma) break;
191:     ninside++;
192:   }
193:   eps->nev = ninside+kini;  /* adjust eigenvalue count */
194:   nconv = 0;   /* count how many eigenvalues satisfy the convergence criterion */
195:   for (k=kini;k<kini+ninside;k++) {
196:     /* eigenvalue */
197:     re = eps->eigr[k];
198:     im = eps->eigi[k];
199:     DSVectors(eps->ds,DS_MAT_X,&k,&resnorm);
200:     resnorm *= beta;
201:     /* error estimate */
202:     (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
203:     if (eps->errest[k] < eps->tol) nconv++;
204:     else break;
205:   }
206:   *kout = kini+nconv;
207:   PetscInfo4(eps,"Found %D eigenvalue approximations inside the inverval (gamma=%g), k=%D nconv=%D\n",ninside,(double)gamma,k,nconv);
208:   return(0);
209: }

211: /*
212:    EPSKrylovConvergence - Implements the loop that checks for convergence
213:    in Krylov methods.

215:    Input Parameters:
216:      eps   - the eigensolver; some error estimates are updated in eps->errest
217:      getall - whether all residuals must be computed
218:      kini  - initial value of k (the loop variable)
219:      nits  - number of iterations of the loop
220:      V     - set of basis vectors (used only if trueresidual is activated)
221:      nv    - number of vectors to process (dimension of Q, columns of V)
222:      beta  - norm of f (the residual vector of the Arnoldi/Lanczos factorization)
223:      corrf - correction factor for residual estimates (only in harmonic KS)

225:    Output Parameters:
226:      kout  - the first index where the convergence test failed
227: */
228: PetscErrorCode EPSKrylovConvergence(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal betat,PetscReal corrf,PetscInt *kout)
229: {
231:   PetscInt       k,newk,newk2,marker,ld,inside;
232:   PetscScalar    re,im,*Zr,*Zi,*X;
233:   PetscReal      resnorm,gamma,lerrest;
234:   PetscBool      isshift,isfilter,refined,istrivial;
235:   Vec            x=NULL,y=NULL,w[3];

238:   if (eps->which == EPS_ALL) {
239:     PetscObjectTypeCompare((PetscObject)eps->st,STFILTER,&isfilter);
240:     if (isfilter) {
241:       STFilterGetThreshold(eps->st,&gamma);
242:       EPSKrylovConvergence_Filter(eps,getall,kini,nits,beta,gamma,kout);
243:       return(0);
244:     }
245:   }
246:   RGIsTrivial(eps->rg,&istrivial);
247:   if (eps->trueres) {
248:     BVCreateVec(eps->V,&x);
249:     BVCreateVec(eps->V,&y);
250:     BVCreateVec(eps->V,&w[0]);
251:     BVCreateVec(eps->V,&w[2]);
252: #if !defined(PETSC_USE_COMPLEX)
253:     BVCreateVec(eps->V,&w[1]);
254: #else
255:     w[1] = NULL;
256: #endif
257:   }
258:   DSGetLeadingDimension(eps->ds,&ld);
259:   DSGetRefined(eps->ds,&refined);
260:   PetscObjectTypeCompare((PetscObject)eps->st,STSHIFT,&isshift);
261:   marker = -1;
262:   if (eps->trackall) getall = PETSC_TRUE;
263:   for (k=kini;k<kini+nits;k++) {
264:     /* eigenvalue */
265:     re = eps->eigr[k];
266:     im = eps->eigi[k];
267:     if (!istrivial || eps->trueres || isshift || eps->conv==EPS_CONV_NORM) {
268:       STBackTransform(eps->st,1,&re,&im);
269:     }
270:     if (!istrivial) {
271:       RGCheckInside(eps->rg,1,&re,&im,&inside);
272:       if (marker==-1 && inside<0) marker = k;
273:       if (!(eps->trueres || isshift || eps->conv==EPS_CONV_NORM)) {  /* make sure eps->converged below uses the right value */
274:         re = eps->eigr[k];
275:         im = eps->eigi[k];
276:       }
277:     }
278:     newk = k;
279:     DSVectors(eps->ds,DS_MAT_X,&newk,&resnorm);
280:     if (eps->trueres) {
281:       DSGetArray(eps->ds,DS_MAT_X,&X);
282:       Zr = X+k*ld;
283:       if (newk==k+1) Zi = X+newk*ld;
284:       else Zi = NULL;
285:       EPSComputeRitzVector(eps,Zr,Zi,eps->V,x,y);
286:       DSRestoreArray(eps->ds,DS_MAT_X,&X);
287:       EPSComputeResidualNorm_Private(eps,PETSC_FALSE,re,im,x,y,w,&resnorm);
288:     }
289:     else if (!refined) resnorm *= beta*corrf;
290:     /* error estimate */
291:     (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
292:     if (marker==-1 && eps->errest[k] >= eps->tol) marker = k;
293:     if (eps->twosided) {
294:       newk2 = k;
295:       DSVectors(eps->dsts,DS_MAT_X,&newk2,&resnorm);
296:       resnorm *= betat;
297:       (*eps->converged)(eps,re,im,resnorm,&lerrest,eps->convergedctx);
298:       eps->errest[k] = PetscMax(eps->errest[k],lerrest);
299:       if (marker==-1 && lerrest >= eps->tol) marker = k;
300:     }
301:     if (newk==k+1) {
302:       eps->errest[k+1] = eps->errest[k];
303:       k++;
304:     }
305:     if (marker!=-1 && !getall) break;
306:   }
307:   if (marker!=-1) k = marker;
308:   *kout = k;
309:   if (eps->trueres) {
310:     VecDestroy(&x);
311:     VecDestroy(&y);
312:     VecDestroy(&w[0]);
313:     VecDestroy(&w[2]);
314: #if !defined(PETSC_USE_COMPLEX)
315:     VecDestroy(&w[1]);
316: #endif
317:   }
318:   return(0);
319: }

321: PetscErrorCode EPSPseudoLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscReal *cos,Vec w)
322: {
324:   PetscInt       j,m = *M,i,ld,l;
325:   Vec            vj,vj1;
326:   PetscScalar    *hwork,lhwork[100];
327:   PetscReal      norm,norm1,norm2,t,*f,sym=0.0,fro=0.0;
328:   PetscBLASInt   j_,one=1;

331:   DSGetLeadingDimension(eps->ds,&ld);
332:   DSGetDimensions(eps->ds,NULL,NULL,&l,NULL,NULL);
333:   if (cos) *cos = 1.0;
334:   if (m > 100) {
335:     PetscMalloc1(m,&hwork);
336:   } else hwork = lhwork;

338:   BVSetActiveColumns(eps->V,0,m);
339:   for (j=k;j<m;j++) {
340:     BVGetColumn(eps->V,j,&vj);
341:     BVGetColumn(eps->V,j+1,&vj1);
342:     STApply(eps->st,vj,vj1);
343:     BVRestoreColumn(eps->V,j,&vj);
344:     BVRestoreColumn(eps->V,j+1,&vj1);
345:     BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);
346:     alpha[j] = PetscRealPart(hwork[j]);
347:     beta[j] = PetscAbsReal(norm);
348:     DSGetArrayReal(eps->ds,DS_MAT_T,&f);
349:     if (j==k) {
350:       for (i=l;i<j-1;i++) hwork[i]-= f[2*ld+i];
351:       for (i=0;i<l;i++) hwork[i] = 0.0;
352:     }
353:     DSRestoreArrayReal(eps->ds,DS_MAT_T,&f);
354:     hwork[j-1] -= beta[j-1];
355:     PetscBLASIntCast(j,&j_);
356:     sym = SlepcAbs(BLASnrm2_(&j_,hwork,&one),sym);
357:     fro = SlepcAbs(fro,SlepcAbs(alpha[j],beta[j]));
358:     if (j>0) fro = SlepcAbs(fro,beta[j-1]);
359:     if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*eps->tol)) { *symmlost = PETSC_TRUE; *M=j+1; break; }
360:     omega[j+1] = (norm<0.0)? -1.0: 1.0;
361:     BVScaleColumn(eps->V,j+1,1.0/norm);
362:     /* */
363:     if (cos) {
364:       BVGetColumn(eps->V,j+1,&vj1);
365:       VecNorm(vj1,NORM_2,&norm1);
366:       BVApplyMatrix(eps->V,vj1,w);
367:       BVRestoreColumn(eps->V,j+1,&vj1);
368:       VecNorm(w,NORM_2,&norm2);
369:       t = 1.0/(norm1*norm2);
370:       if (*cos>t) *cos = t;
371:     }
372:   }
373:   if (m > 100) {
374:     PetscFree(hwork);
375:   }
376:   return(0);
377: }