Actual source code: bcgsl.c
1: #define PETSCKSP_DLL
2: /*
3: * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
4: * "Enhanced implementation of BiCGStab(L) for solving linear systems
5: * of equations". This uses tricky delayed updating ideas to prevent
6: * round-off buildup.
7: */
8: #include petscblaslapack.h
9: #include include/private/kspimpl.h
10: #include bcgsl.h
15: static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
16: {
17: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *) ksp->data;
18: PetscScalar alpha, beta, nu, omega, sigma;
19: PetscScalar rho0, rho1;
20: PetscReal kappa0, kappaA, kappa1;
21: PetscReal ghat, epsilon, abstol;
22: PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
23: PetscTruth bUpdateX;
24: PetscTruth bBombed = PETSC_FALSE;
26: PetscInt maxit;
27: PetscInt h, i, j, k, vi, ell;
28: PetscBLASInt ldMZ,bierr;
33: /* set up temporary vectors */
34: vi = 0;
35: ell = bcgsl->ell;
36: bcgsl->vB = ksp->work[vi]; vi++;
37: bcgsl->vRt = ksp->work[vi]; vi++;
38: bcgsl->vTm = ksp->work[vi]; vi++;
39: bcgsl->vvR = ksp->work+vi; vi += ell+1;
40: bcgsl->vvU = ksp->work+vi; vi += ell+1;
41: bcgsl->vXr = ksp->work[vi]; vi++;
42: ldMZ = ell+1;
43: {
44: PetscMalloc(ldMZ*sizeof(PetscScalar), &AY0c);
45: PetscMalloc(ldMZ*sizeof(PetscScalar), &AYlc);
46: PetscMalloc(ldMZ*sizeof(PetscScalar), &AYtc);
47: PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZa);
48: PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZb);
49: }
51: /* Prime the iterative solver */
52: KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
53: VecNorm(VVR[0], NORM_2, &zeta0);
54: rnmax_computed = zeta0;
55: rnmax_true = zeta0;
57: (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
58: if (ksp->reason) {
59: PetscObjectTakeAccess(ksp);
60: ksp->its = 0;
61: ksp->rnorm = zeta0;
62: PetscObjectGrantAccess(ksp);
63: PetscFree(AY0c);
64: PetscFree(AYlc);
65: PetscFree(AYtc);
66: PetscFree(MZa);
67: PetscFree(MZb);
69: return(0);
70: }
72: VecSet(VVU[0],0.0);
73: alpha = 0;
74: rho0 = omega = 1;
76: if (bcgsl->delta>0.0) {
77: VecCopy(VX, VXR);
78: VecSet(VX,0.0);
79: VecCopy(VVR[0], VB);
80: } else {
81: VecCopy(ksp->vec_rhs, VB);
82: }
84: /* Life goes on */
85: VecCopy(VVR[0], VRT);
86: zeta = zeta0;
88: KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
90: for (k=0; k<maxit; k += bcgsl->ell) {
91: PetscObjectTakeAccess(ksp);
92: ksp->its = k;
93: ksp->rnorm = zeta;
94: PetscObjectGrantAccess(ksp);
96: KSPLogResidualHistory(ksp, zeta);
97: KSPMonitor(ksp, ksp->its, zeta);
99: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
100: if (ksp->reason) break;
102: /* BiCG part */
103: rho0 = -omega*rho0;
104: nrm0 = zeta;
105: for (j=0; j<bcgsl->ell; j++) {
106: /* rho1 <- r_j' * r_tilde */
107: VecDot(VVR[j], VRT, &rho1);
108: if (rho1 == 0.0) {
109: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
110: bBombed = PETSC_TRUE;
111: break;
112: }
113: beta = alpha*(rho1/rho0);
114: rho0 = rho1;
115: nu = -beta;
116: for (i=0; i<=j; i++) {
117: /* u_i <- r_i - beta*u_i */
118: VecAYPX(VVU[i], nu, VVR[i]);
119: }
120: /* u_{j+1} <- inv(K)*A*u_j */
121: KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
123: VecDot(VVU[j+1], VRT, &sigma);
124: if (sigma == 0.0) {
125: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
126: bBombed = PETSC_TRUE;
127: break;
128: }
129: alpha = rho1/sigma;
131: /* x <- x + alpha*u_0 */
132: VecAXPY(VX, alpha, VVU[0]);
134: nu = -alpha;
135: for (i=0; i<=j; i++) {
136: /* r_i <- r_i - alpha*u_{i+1} */
137: VecAXPY(VVR[i], nu, VVU[i+1]);
138: }
140: /* r_{j+1} <- inv(K)*A*r_j */
141: KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
143: VecNorm(VVR[0], NORM_2, &nrm0);
144: if (bcgsl->delta>0.0) {
145: if (rnmax_computed<nrm0) rnmax_computed = nrm0;
146: if (rnmax_true<nrm0) rnmax_true = nrm0;
147: }
149: /* NEW: check for early exit */
150: (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
151: if (ksp->reason) {
152: PetscObjectTakeAccess(ksp);
153: ksp->its = k+j;
154: ksp->rnorm = nrm0;
155: PetscObjectGrantAccess(ksp);
156: break;
157: }
158: }
160: if (bBombed==PETSC_TRUE) break;
162: /* Polynomial part */
164: for (i=0; i<=bcgsl->ell; i++) {
165: for (j=0; j<i; j++) {
166: VecDot(VVR[j], VVR[i], &nu);
167: MZa[i+ldMZ*j] = nu;
168: MZa[j+ldMZ*i] = nu;
169: MZb[i+ldMZ*j] = nu;
170: MZb[j+ldMZ*i] = nu;
171: }
173: VecDot(VVR[i], VVR[i], &nu);
174: MZa[i+ldMZ*i] = nu;
175: MZb[i+ldMZ*i] = nu;
176: }
178: if (!bcgsl->bConvex || bcgsl->ell==1) {
179: PetscBLASInt ione = 1,bell = bcgsl->ell;
181: AY0c[0] = -1;
182: LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
183: if (ierr!=0) {
184: ksp->reason = KSP_DIVERGED_BREAKDOWN;
185: bBombed = PETSC_TRUE;
186: break;
187: }
188: BLAScopy_(&bell, &MZb[1], &ione, &AY0c[1], &ione);
189: LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
190: } else {
191: PetscBLASInt neqs = bcgsl->ell-1;
192: PetscBLASInt ione = 1;
193: PetscScalar aone = 1.0, azero = 0.0;
195: LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
196: if (ierr!=0) {
197: ksp->reason = KSP_DIVERGED_BREAKDOWN;
198: bBombed = PETSC_TRUE;
199: break;
200: }
201: BLAScopy_(&neqs, &MZb[1], &ione, &AY0c[1], &ione);
202: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
203: AY0c[0] = -1;
204: AY0c[bcgsl->ell] = 0;
206: BLAScopy_(&neqs, &MZb[1+ldMZ*(bcgsl->ell)], &ione, &AYlc[1], &ione);
207: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);
209: AYlc[0] = 0;
210: AYlc[bcgsl->ell] = -1;
212: BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);
214: kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);
216: /* round-off can cause negative kappa's */
217: if (kappa0<0) kappa0 = -kappa0;
218: kappa0 = sqrt(kappa0);
220: kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
222: BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);
224: kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
226: if (kappa1<0) kappa1 = -kappa1;
227: kappa1 = sqrt(kappa1);
229: if (kappa0!=0.0 && kappa1!=0.0) {
230: if (kappaA<0.7*kappa0*kappa1) {
231: ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
232: } else {
233: ghat = kappaA/(kappa1*kappa1);
234: }
235: for (i=0; i<=bcgsl->ell; i++) {
236: AY0c[i] = AY0c[i] - ghat* AYlc[i];
237: }
238: }
239: }
241: omega = AY0c[bcgsl->ell];
242: for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
243: omega = AY0c[h];
244: }
245: if (omega==0.0) {
246: ksp->reason = KSP_DIVERGED_BREAKDOWN;
247: break;
248: }
250: for (i=1; i<=bcgsl->ell; i++) {
251: nu = -AY0c[i];
252: VecAXPY(VVU[0], nu, VVU[i]);
253: nu = AY0c[i];
254: VecAXPY(VX, nu, VVR[i-1]);
255: nu = -AY0c[i];
256: VecAXPY(VVR[0], nu, VVR[i]);
257: }
259: VecNorm(VVR[0], NORM_2, &zeta);
261: /* Accurate Update */
262: if (bcgsl->delta>0.0) {
263: if (rnmax_computed<zeta) rnmax_computed = zeta;
264: if (rnmax_true<zeta) rnmax_true = zeta;
266: bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
267: if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
268: /* r0 <- b-inv(K)*A*X */
269: KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
270: nu = -1;
271: VecAYPX(VVR[0], nu, VB);
272: rnmax_true = zeta;
274: if (bUpdateX) {
275: nu = 1;
276: VecAXPY(VXR,nu,VX);
277: VecSet(VX,0.0);
278: VecCopy(VVR[0], VB);
279: rnmax_computed = zeta;
280: }
281: }
282: }
283: }
285: KSPMonitor(ksp, ksp->its, zeta);
287: if (bcgsl->delta>0.0) {
288: nu = 1;
289: VecAXPY(VX,nu,VXR);
290: }
292: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
293: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
295: PetscFree(AY0c);
296: PetscFree(AYlc);
297: PetscFree(AYtc);
298: PetscFree(MZa);
299: PetscFree(MZb);
300: return(0);
301: }
305: /*@C
306: KSPBCGSLSetXRes - Sets the parameter governing when
307: exact residuals will be used instead of computed residuals.
309: Collective on KSP
311: Input Parameters:
312: + ksp - iterative context obtained from KSPCreate
313: - delta - computed residuals are used alone when delta is not positive
315: Options Database Keys:
317: . -ksp_bcgsl_xres delta
319: Level: intermediate
321: .keywords: KSP, BiCGStab(L), set, exact residuals
323: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
324: @*/
325: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
326: {
327: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
331: if (ksp->setupcalled) {
332: if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
333: KSPDefaultFreeWork(ksp);
334: ksp->setupcalled = 0;
335: }
336: }
337: bcgsl->delta = delta;
338: return(0);
339: }
343: /*@C
344: KSPBCGSLSetPol - Sets the type of polynomial part will
345: be used in the BiCGSTab(L) solver.
347: Collective on KSP
349: Input Parameters:
350: + ksp - iterative context obtained from KSPCreate
351: - uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.
353: Options Database Keys:
355: + -ksp_bcgsl_cxpoly - use enhanced polynomial
356: . -ksp_bcgsl_mrpoly - use standard polynomial
358: Level: intermediate
360: .keywords: KSP, BiCGStab(L), set, polynomial
362: .seealso: @()
363: @*/
364: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
365: {
366: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
370: if (!ksp->setupcalled) {
371: bcgsl->bConvex = uMROR;
372: } else if (bcgsl->bConvex != uMROR) {
373: /* free the data structures,
374: then create them again
375: */
376: KSPDefaultFreeWork(ksp);
377: bcgsl->bConvex = uMROR;
378: ksp->setupcalled = 0;
379: }
380: return(0);
381: }
385: /*@C
386: KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
388: Collective on KSP
390: Input Parameters:
391: + ksp - iterative context obtained from KSPCreate
392: - ell - number of search directions
394: Options Database Keys:
396: . -ksp_bcgsl_ell ell
398: Level: intermediate
400: .keywords: KSP, BiCGStab(L), set, exact residuals,
402: .seealso: @()
403: @*/
404: PetscErrorCode KSPBCGSLSetEll(KSP ksp, int ell)
405: {
406: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
410: if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
412: if (!ksp->setupcalled) {
413: bcgsl->ell = ell;
414: } else if (bcgsl->ell != ell) {
415: /* free the data structures, then create them again */
416: KSPDefaultFreeWork(ksp);
418: bcgsl->ell = ell;
419: ksp->setupcalled = 0;
420: }
421: return(0);
422: }
426: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
427: {
428: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
429: PetscErrorCode ierr;
430: PetscTruth isascii, isstring;
433: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
434: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);
436: if (isascii) {
437: PetscViewerASCIIPrintf(viewer, " BCGSL: Ell = %D\n", bcgsl->ell);
438: PetscViewerASCIIPrintf(viewer, " BCGSL: Delta = %lg\n", bcgsl->delta);
439: } else {
440: SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
441: }
442: return(0);
443: }
447: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
448: {
449: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
451: PetscInt this_ell;
452: PetscReal delta;
453: PetscTruth flga, flg;
456: /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
457: don't need to be called here.
458: */
459: PetscOptionsHead("KSP BiCGStab(L) Options");
461: /* Set number of search directions */
462: PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
463: if (flg) {
464: KSPBCGSLSetEll(ksp, this_ell);
465: }
467: /* Set polynomial type */
468: PetscOptionsName("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", &flga);
469: if (flga) {
470: KSPBCGSLSetPol(ksp, PETSC_TRUE);
471: } else {
472: PetscOptionsName("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", &flg);
473: KSPBCGSLSetPol(ksp, PETSC_FALSE);
474: }
476: /* Will computed residual be refreshed? */
477: PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
478: if (flg) {
479: KSPBCGSLSetXRes(ksp, delta);
480: }
481: PetscOptionsTail();
482: return(0);
483: }
487: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
488: {
489: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
490: PetscInt ell = bcgsl->ell;
494: /* Support left preconditioners only */
495: if (ksp->pc_side == PC_SYMMETRIC) {
496: SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
497: } else if (ksp->pc_side == PC_RIGHT) {
498: SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
499: }
500: KSPDefaultGetWork(ksp, 6+2*ell);
501: return(0);
502: }
504: /*MC
505: KSPBCGSL - Implements a slight variant of the Enhanced
506: BiCGStab(L) algorithm in (3) and (2). The variation
507: concerns cases when either kappa0**2 or kappa1**2 is
508: negative due to round-off. Kappa0 has also been pulled
509: out of the denominator in the formula for ghat.
511: References:
512: 1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
513: approaches for the stable computation of hybrid BiCG
514: methods", Applied Numerical Mathematics: Transactions
515: f IMACS, 19(3), pp 235-54, 1996.
516: 2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
517: "BiCGStab(L) and other hybrid Bi-CG methods",
518: Numerical Algorithms, 7, pp 75-109, 1994.
519: 3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
520: for solving linear systems of equations", preprint
521: from www.citeseer.com.
523: Contributed by: Joel M. Malard, email jm.malard@pnl.gov
525: Options Database Keys:
526: + -ksp_bcgsl_ell <ell> Number of Krylov search directions
527: - -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
528: - -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals
530: Level: beginner
532: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS
534: M*/
538: PetscErrorCode KSPCreate_BCGSL(KSP ksp)
539: {
541: KSP_BiCGStabL *bcgsl;
544: /* allocate BiCGStab(L) context */
545: PetscNew(KSP_BiCGStabL, &bcgsl);
546: ksp->data = (void*)bcgsl;
548: ksp->pc_side = PC_LEFT;
549: ksp->ops->setup = KSPSetUp_BCGSL;
550: ksp->ops->solve = KSPSolve_BCGSL;
551: ksp->ops->destroy = KSPDefaultDestroy;
552: ksp->ops->buildsolution = KSPDefaultBuildSolution;
553: ksp->ops->buildresidual = KSPDefaultBuildResidual;
554: ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
555: ksp->ops->view = KSPView_BCGSL;
557: /* Let the user redefine the number of directions vectors */
558: bcgsl->ell = 2;
560: /*Choose between a single MR step or an averaged MR/OR */
561: bcgsl->bConvex = PETSC_FALSE;
563: /* Set the threshold for when exact residuals will be used */
564: bcgsl->delta = 0.0;
565: return(0);
566: }