Actual source code: bcgsl.c

  1: #define PETSCKSP_DLL
  2: /*
  3:  * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
  4:  * "Enhanced implementation of BiCGStab(L) for solving linear systems
  5:  * of equations". This uses tricky delayed updating ideas to prevent
  6:  * round-off buildup.
  7:  */
 8:  #include petscblaslapack.h
 9:  #include include/private/kspimpl.h
 10:  #include bcgsl.h


 15: static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
 16: {
 17:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *) ksp->data;
 18:   PetscScalar    alpha, beta, nu, omega, sigma;
 19:   PetscScalar    rho0, rho1;
 20:   PetscReal      kappa0, kappaA, kappa1;
 21:   PetscReal      ghat, epsilon, abstol;
 22:   PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
 23:   PetscTruth     bUpdateX;
 24:   PetscTruth     bBombed = PETSC_FALSE;

 26:   PetscInt       maxit;
 27:   PetscInt       h, i, j, k, vi, ell;
 28:   PetscBLASInt   ldMZ,bierr;


 33:   /* set up temporary vectors */
 34:   vi = 0;
 35:   ell = bcgsl->ell;
 36:   bcgsl->vB    = ksp->work[vi]; vi++;
 37:   bcgsl->vRt   = ksp->work[vi]; vi++;
 38:   bcgsl->vTm   = ksp->work[vi]; vi++;
 39:   bcgsl->vvR   = ksp->work+vi; vi += ell+1;
 40:   bcgsl->vvU   = ksp->work+vi; vi += ell+1;
 41:   bcgsl->vXr   = ksp->work[vi]; vi++;
 42:   ldMZ = ell+1;
 43:   {
 44:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AY0c);
 45:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AYlc);
 46:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AYtc);
 47:     PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZa);
 48:     PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZb);
 49:   }

 51:   /* Prime the iterative solver */
 52:   KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
 53:   VecNorm(VVR[0], NORM_2, &zeta0);
 54:   rnmax_computed = zeta0;
 55:   rnmax_true = zeta0;

 57:   (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
 58:   if (ksp->reason) {
 59:     PetscObjectTakeAccess(ksp);
 60:     ksp->its   = 0;
 61:     ksp->rnorm = zeta0;
 62:     PetscObjectGrantAccess(ksp);
 63:     PetscFree(AY0c);
 64:     PetscFree(AYlc);
 65:     PetscFree(AYtc);
 66:     PetscFree(MZa);
 67:     PetscFree(MZb);

 69:     return(0);
 70:   }

 72:   VecSet(VVU[0],0.0);
 73:   alpha = 0;
 74:   rho0 = omega = 1;

 76:   if (bcgsl->delta>0.0) {
 77:     VecCopy(VX, VXR);
 78:     VecSet(VX,0.0);
 79:     VecCopy(VVR[0], VB);
 80:   } else {
 81:     VecCopy(ksp->vec_rhs, VB);
 82:   }

 84:   /* Life goes on */
 85:   VecCopy(VVR[0], VRT);
 86:   zeta = zeta0;

 88:   KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);

 90:   for (k=0; k<maxit; k += bcgsl->ell) {
 91:     PetscObjectTakeAccess(ksp);
 92:     ksp->its   = k;
 93:     ksp->rnorm = zeta;
 94:     PetscObjectGrantAccess(ksp);

 96:     KSPLogResidualHistory(ksp, zeta);
 97:     KSPMonitor(ksp, ksp->its, zeta);

 99:     (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
100:     if (ksp->reason) break;

102:     /* BiCG part */
103:     rho0 = -omega*rho0;
104:     nrm0 = zeta;
105:     for (j=0; j<bcgsl->ell; j++) {
106:       /* rho1 <- r_j' * r_tilde */
107:       VecDot(VVR[j], VRT, &rho1);
108:       if (rho1 == 0.0) {
109:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
110:         bBombed = PETSC_TRUE;
111:         break;
112:       }
113:       beta = alpha*(rho1/rho0);
114:       rho0 = rho1;
115:       nu = -beta;
116:       for (i=0; i<=j; i++) {
117:         /* u_i <- r_i - beta*u_i */
118:         VecAYPX(VVU[i], nu, VVR[i]);
119:       }
120:       /* u_{j+1} <- inv(K)*A*u_j */
121:       KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);

123:       VecDot(VVU[j+1], VRT, &sigma);
124:       if (sigma == 0.0) {
125:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
126:         bBombed = PETSC_TRUE;
127:         break;
128:       }
129:       alpha = rho1/sigma;

131:       /* x <- x + alpha*u_0 */
132:       VecAXPY(VX, alpha, VVU[0]);

134:       nu = -alpha;
135:       for (i=0; i<=j; i++) {
136:         /* r_i <- r_i - alpha*u_{i+1} */
137:         VecAXPY(VVR[i], nu, VVU[i+1]);
138:       }

140:       /* r_{j+1} <- inv(K)*A*r_j */
141:       KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);

143:       VecNorm(VVR[0], NORM_2, &nrm0);
144:       if (bcgsl->delta>0.0) {
145:         if (rnmax_computed<nrm0) rnmax_computed = nrm0;
146:         if (rnmax_true<nrm0) rnmax_true = nrm0;
147:       }

149:       /* NEW: check for early exit */
150:       (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
151:       if (ksp->reason) {
152:         PetscObjectTakeAccess(ksp);
153:         ksp->its   = k+j;
154:         ksp->rnorm = nrm0;
155:         PetscObjectGrantAccess(ksp);
156:         break;
157:       }
158:     }

160:     if (bBombed==PETSC_TRUE) break;

162:     /* Polynomial part */

164:     for (i=0; i<=bcgsl->ell; i++) {
165:       for (j=0; j<i; j++) {
166:         VecDot(VVR[j], VVR[i], &nu);
167:         MZa[i+ldMZ*j] = nu;
168:         MZa[j+ldMZ*i] = nu;
169:         MZb[i+ldMZ*j] = nu;
170:         MZb[j+ldMZ*i] = nu;
171:       }

173:       VecDot(VVR[i], VVR[i], &nu);
174:       MZa[i+ldMZ*i] = nu;
175:       MZb[i+ldMZ*i] = nu;
176:     }

178:     if (!bcgsl->bConvex || bcgsl->ell==1) {
179:       PetscBLASInt ione = 1,bell = bcgsl->ell;

181:       AY0c[0] = -1;
182:       LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
183:       if (ierr!=0) {
184:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
185:         bBombed = PETSC_TRUE;
186:         break;
187:       }
188:       BLAScopy_(&bell, &MZb[1], &ione, &AY0c[1], &ione);
189:       LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
190:     } else {
191:       PetscBLASInt neqs = bcgsl->ell-1;
192:       PetscBLASInt ione = 1;
193:       PetscScalar aone = 1.0, azero = 0.0;

195:       LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
196:       if (ierr!=0) {
197:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
198:         bBombed = PETSC_TRUE;
199:         break;
200:       }
201:       BLAScopy_(&neqs, &MZb[1], &ione, &AY0c[1], &ione);
202:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
203:       AY0c[0] = -1;
204:       AY0c[bcgsl->ell] = 0;

206:       BLAScopy_(&neqs, &MZb[1+ldMZ*(bcgsl->ell)], &ione, &AYlc[1], &ione);
207:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

209:       AYlc[0] = 0;
210:       AYlc[bcgsl->ell] = -1;

212:       BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

214:       kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

216:       /* round-off can cause negative kappa's */
217:       if (kappa0<0) kappa0 = -kappa0;
218:       kappa0 = sqrt(kappa0);

220:       kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

222:       BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

224:       kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

226:       if (kappa1<0) kappa1 = -kappa1;
227:       kappa1 = sqrt(kappa1);

229:       if (kappa0!=0.0 && kappa1!=0.0) {
230:         if (kappaA<0.7*kappa0*kappa1) {
231:           ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
232:         } else {
233:           ghat = kappaA/(kappa1*kappa1);
234:         }
235:         for (i=0; i<=bcgsl->ell; i++) {
236:           AY0c[i] = AY0c[i] - ghat* AYlc[i];
237:         }
238:       }
239:     }

241:     omega = AY0c[bcgsl->ell];
242:     for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
243:       omega = AY0c[h];
244:     }
245:     if (omega==0.0) {
246:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
247:       break;
248:     }

250:     for (i=1; i<=bcgsl->ell; i++) {
251:       nu = -AY0c[i];
252:       VecAXPY(VVU[0], nu, VVU[i]);
253:       nu = AY0c[i];
254:       VecAXPY(VX, nu, VVR[i-1]);
255:       nu = -AY0c[i];
256:       VecAXPY(VVR[0], nu, VVR[i]);
257:     }

259:     VecNorm(VVR[0], NORM_2, &zeta);

261:     /* Accurate Update */
262:     if (bcgsl->delta>0.0) {
263:       if (rnmax_computed<zeta) rnmax_computed = zeta;
264:       if (rnmax_true<zeta) rnmax_true = zeta;

266:       bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
267:       if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
268:         /* r0 <- b-inv(K)*A*X */
269:         KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
270:         nu = -1;
271:         VecAYPX(VVR[0], nu, VB);
272:         rnmax_true = zeta;

274:         if (bUpdateX) {
275:           nu = 1;
276:           VecAXPY(VXR,nu,VX);
277:           VecSet(VX,0.0);
278:           VecCopy(VVR[0], VB);
279:           rnmax_computed = zeta;
280:         }
281:       }
282:     }
283:   }

285:   KSPMonitor(ksp, ksp->its, zeta);

287:   if (bcgsl->delta>0.0) {
288:     nu   = 1;
289:     VecAXPY(VX,nu,VXR);
290:   }

292:   (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
293:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;

295:   PetscFree(AY0c);
296:   PetscFree(AYlc);
297:   PetscFree(AYtc);
298:   PetscFree(MZa);
299:   PetscFree(MZb);
300:   return(0);
301: }

305: /*@C
306:    KSPBCGSLSetXRes - Sets the parameter governing when
307:    exact residuals will be used instead of computed residuals.

309:    Collective on KSP

311:    Input Parameters:
312: +  ksp - iterative context obtained from KSPCreate
313: -  delta - computed residuals are used alone when delta is not positive

315:    Options Database Keys:

317: .  -ksp_bcgsl_xres delta

319:    Level: intermediate

321: .keywords: KSP, BiCGStab(L), set, exact residuals

323: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
324: @*/
325: PetscErrorCode  KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
326: {
327:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

331:   if (ksp->setupcalled) {
332:     if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
333:       KSPDefaultFreeWork(ksp);
334:       ksp->setupcalled = 0;
335:     }
336:   }
337:   bcgsl->delta = delta;
338:   return(0);
339: }

343: /*@C
344:    KSPBCGSLSetPol - Sets the type of polynomial part will
345:    be used in the BiCGSTab(L) solver.

347:    Collective on KSP

349:    Input Parameters:
350: +  ksp - iterative context obtained from KSPCreate
351: -  uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.

353:    Options Database Keys:

355: +  -ksp_bcgsl_cxpoly - use enhanced polynomial
356: .  -ksp_bcgsl_mrpoly - use standard polynomial

358:    Level: intermediate

360: .keywords: KSP, BiCGStab(L), set, polynomial

362: .seealso: @()
363: @*/
364: PetscErrorCode  KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
365: {
366:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

370:   if (!ksp->setupcalled) {
371:     bcgsl->bConvex = uMROR;
372:   } else if (bcgsl->bConvex != uMROR) {
373:     /* free the data structures,
374:        then create them again
375:      */
376:    KSPDefaultFreeWork(ksp);
377:     bcgsl->bConvex = uMROR;
378:     ksp->setupcalled = 0;
379:   }
380:   return(0);
381: }

385: /*@C
386:    KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).

388:    Collective on KSP

390:    Input Parameters:
391: +  ksp - iterative context obtained from KSPCreate
392: -  ell - number of search directions

394:    Options Database Keys:

396: .  -ksp_bcgsl_ell ell

398:    Level: intermediate

400: .keywords: KSP, BiCGStab(L), set, exact residuals,

402: .seealso: @()
403: @*/
404: PetscErrorCode  KSPBCGSLSetEll(KSP ksp, int ell)
405: {
406:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

410:   if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");

412:   if (!ksp->setupcalled) {
413:     bcgsl->ell = ell;
414:   } else if (bcgsl->ell != ell) {
415:     /* free the data structures, then create them again */
416:     KSPDefaultFreeWork(ksp);

418:     bcgsl->ell = ell;
419:     ksp->setupcalled = 0;
420:   }
421:   return(0);
422: }

426: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
427: {
428:   KSP_BiCGStabL       *bcgsl = (KSP_BiCGStabL *)ksp->data;
429:   PetscErrorCode      ierr;
430:   PetscTruth          isascii, isstring;

433:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
434:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);

436:   if (isascii) {
437:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Ell = %D\n", bcgsl->ell);
438:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Delta = %lg\n", bcgsl->delta);
439:   } else {
440:     SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
441:   }
442:   return(0);
443: }

447: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
448: {
449:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;
451:   PetscInt       this_ell;
452:   PetscReal      delta;
453:   PetscTruth     flga, flg;

456:   /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
457:      don't need to be called here.
458:   */
459:   PetscOptionsHead("KSP BiCGStab(L) Options");

461:   /* Set number of search directions */
462:   PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
463:   if (flg) {
464:     KSPBCGSLSetEll(ksp, this_ell);
465:   }

467:   /* Set polynomial type */
468:   PetscOptionsName("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", &flga);
469:   if (flga) {
470:     KSPBCGSLSetPol(ksp, PETSC_TRUE);
471:   } else {
472:     PetscOptionsName("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", &flg);
473:     KSPBCGSLSetPol(ksp, PETSC_FALSE);
474:   }

476:   /* Will computed residual be refreshed? */
477:   PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
478:   if (flg) {
479:     KSPBCGSLSetXRes(ksp, delta);
480:   }
481:   PetscOptionsTail();
482:   return(0);
483: }

487: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
488: {
489:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;
490:   PetscInt        ell = bcgsl->ell;

494:   /* Support left preconditioners only */
495:   if (ksp->pc_side == PC_SYMMETRIC) {
496:     SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
497:   } else if (ksp->pc_side == PC_RIGHT) {
498:     SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
499:   }
500:   KSPDefaultGetWork(ksp, 6+2*ell);
501:   return(0);
502: }

504: /*MC
505:      KSPBCGSL - Implements a slight variant of the Enhanced
506:                 BiCGStab(L) algorithm in (3) and (2).  The variation
507:                 concerns cases when either kappa0**2 or kappa1**2 is
508:                 negative due to round-off. Kappa0 has also been pulled
509:                 out of the denominator in the formula for ghat.

511:     References:
512:       1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
513:          approaches for the stable computation of hybrid BiCG
514:          methods", Applied Numerical Mathematics: Transactions
515:          f IMACS, 19(3), pp 235-54, 1996.
516:       2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
517:          "BiCGStab(L) and other hybrid Bi-CG methods",
518:           Numerical Algorithms, 7, pp 75-109, 1994.
519:       3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
520:          for solving linear systems of equations", preprint
521:          from www.citeseer.com.

523:    Contributed by: Joel M. Malard, email jm.malard@pnl.gov

525:    Options Database Keys:
526: +  -ksp_bcgsl_ell <ell> Number of Krylov search directions
527: -  -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
528: -  -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals

530:    Level: beginner

532: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS

534: M*/
538: PetscErrorCode  KSPCreate_BCGSL(KSP ksp)
539: {
541:   KSP_BiCGStabL  *bcgsl;

544:   /* allocate BiCGStab(L) context */
545:   PetscNew(KSP_BiCGStabL, &bcgsl);
546:   ksp->data = (void*)bcgsl;

548:   ksp->pc_side              = PC_LEFT;
549:   ksp->ops->setup           = KSPSetUp_BCGSL;
550:   ksp->ops->solve           = KSPSolve_BCGSL;
551:   ksp->ops->destroy         = KSPDefaultDestroy;
552:   ksp->ops->buildsolution   = KSPDefaultBuildSolution;
553:   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
554:   ksp->ops->setfromoptions  = KSPSetFromOptions_BCGSL;
555:   ksp->ops->view            = KSPView_BCGSL;

557:   /* Let the user redefine the number of directions vectors */
558:   bcgsl->ell = 2;

560:   /*Choose between a single MR step or an averaged MR/OR */
561:   bcgsl->bConvex = PETSC_FALSE;

563:   /* Set the threshold for when exact residuals will be used */
564:   bcgsl->delta = 0.0;
565:   return(0);
566: }