ViennaCL - The Vienna Computing Library  1.5.2
gmres.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_GMRES_HPP_
2 #define VIENNACL_GMRES_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include <limits>
28 #include "viennacl/forwards.h"
29 #include "viennacl/tools/tools.hpp"
31 #include "viennacl/linalg/prod.hpp"
34 #include "viennacl/traits/size.hpp"
36 
37 namespace viennacl
38 {
39  namespace linalg
40  {
41 
44  class gmres_tag //generalized minimum residual
45  {
46  public:
53  gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20)
54  : tol_(tol), iterations_(max_iterations), krylov_dim_(krylov_dim), iters_taken_(0) {}
55 
57  double tolerance() const { return tol_; }
59  unsigned int max_iterations() const { return iterations_; }
61  unsigned int krylov_dim() const { return krylov_dim_; }
63  unsigned int max_restarts() const
64  {
65  unsigned int ret = iterations_ / krylov_dim_;
66  if (ret > 0 && (ret * krylov_dim_ == iterations_) )
67  return ret - 1;
68  return ret;
69  }
70 
72  unsigned int iters() const { return iters_taken_; }
74  void iters(unsigned int i) const { iters_taken_ = i; }
75 
77  double error() const { return last_error_; }
79  void error(double e) const { last_error_ = e; }
80 
81  private:
82  double tol_;
83  unsigned int iterations_;
84  unsigned int krylov_dim_;
85 
86  //return values from solver
87  mutable unsigned int iters_taken_;
88  mutable double last_error_;
89  };
90 
91  namespace detail
92  {
93 
94  template <typename SRC_VECTOR, typename DEST_VECTOR>
95  void gmres_copy_helper(SRC_VECTOR const & src, DEST_VECTOR & dest, vcl_size_t len, vcl_size_t start = 0)
96  {
97  for (vcl_size_t i=0; i<len; ++i)
98  dest[start+i] = src[start+i];
99  }
100 
101  template <typename ScalarType, typename DEST_VECTOR>
102  void gmres_copy_helper(viennacl::vector<ScalarType> const & src, DEST_VECTOR & dest, vcl_size_t len, vcl_size_t start = 0)
103  {
104  typedef typename viennacl::vector<ScalarType>::difference_type difference_type;
105  viennacl::copy( src.begin() + static_cast<difference_type>(start),
106  src.begin() + static_cast<difference_type>(start + len),
107  dest.begin() + static_cast<difference_type>(start));
108  }
109 
118  template <typename VectorType, typename ScalarType>
119  void gmres_setup_householder_vector(VectorType const & input_vec, VectorType & hh_vec, ScalarType & beta, ScalarType & mu, vcl_size_t j)
120  {
121  ScalarType input_j = input_vec(j);
122 
123  // copy entries from input vector to householder vector:
124  detail::gmres_copy_helper(input_vec, hh_vec, viennacl::traits::size(hh_vec) - (j+1), j+1);
125 
126  ScalarType sigma = viennacl::linalg::norm_2(hh_vec);
127  sigma *= sigma;
128 
129  if (sigma == 0)
130  {
131  beta = 0;
132  mu = input_j;
133  }
134  else
135  {
136  mu = std::sqrt(sigma + input_j*input_j);
137 
138  ScalarType hh_vec_0 = (input_j <= 0) ? (input_j - mu) : (-sigma / (input_j + mu));
139 
140  beta = ScalarType(2) * hh_vec_0 * hh_vec_0 / (sigma + hh_vec_0 * hh_vec_0);
141 
142  //divide hh_vec by its diagonal element hh_vec_0
143  hh_vec /= hh_vec_0;
144  hh_vec[j] = ScalarType(1);
145  }
146  }
147 
148  // Apply (I - beta h h^T) to x (Householder reflection with Householder vector h)
149  template <typename VectorType, typename ScalarType>
150  void gmres_householder_reflect(VectorType & x, VectorType const & h, ScalarType beta)
151  {
152  ScalarType hT_in_x = viennacl::linalg::inner_prod(h, x);
153  x -= (beta * hT_in_x) * h;
154  }
155 
156  }
157 
168  template <typename MatrixType, typename VectorType, typename PreconditionerType>
169  VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond)
170  {
171  typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType;
172  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
173  unsigned int problem_size = static_cast<unsigned int>(viennacl::traits::size(rhs));
174  VectorType result = rhs;
175  viennacl::traits::clear(result);
176 
177  unsigned int krylov_dim = tag.krylov_dim();
178  if (problem_size < tag.krylov_dim())
179  krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already)
180 
181  VectorType res = rhs;
182  VectorType v_k_tilde = rhs;
183  VectorType v_k_tilde_temp = rhs;
184 
185  std::vector< std::vector<CPU_ScalarType> > R(krylov_dim, std::vector<CPU_ScalarType>(tag.krylov_dim()));
186  std::vector<CPU_ScalarType> projection_rhs(krylov_dim);
187 
188  std::vector<VectorType> householder_reflectors(krylov_dim, rhs);
189  std::vector<CPU_ScalarType> betas(krylov_dim);
190 
191  CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs);
192 
193  if (norm_rhs == 0) //solution is zero if RHS norm is zero
194  return result;
195 
196  tag.iters(0);
197 
198  for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
199  {
200  //
201  // (Re-)Initialize residual: r = b - A*x (without temporary for the result of A*x)
202  //
203  res = rhs;
204  res -= viennacl::linalg::prod(matrix, result); //initial guess zero
205  precond.apply(res);
206 
207  CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res);
208 
209  //
210  // Check for premature convergence
211  //
212  if (rho_0 / norm_rhs < tag.tolerance() ) // norm_rhs is known to be nonzero here
213  {
214  tag.error(rho_0 / norm_rhs);
215  return result;
216  }
217 
218  //
219  // Normalize residual and set 'rho' to 1 as requested in 'A Simpler GMRES' by Walker and Zhou.
220  //
221  res /= rho_0;
222  CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0);
223 
224 
225  //
226  // Iterate up until maximal Krylove space dimension is reached:
227  //
228  unsigned int k = 0;
229  for (k = 0; k < krylov_dim; ++k)
230  {
231  tag.iters( tag.iters() + 1 ); //increase iteration counter
232 
233  // prepare storage:
235  viennacl::traits::clear(householder_reflectors[k]);
236 
237  //compute v_k = A * v_{k-1} via Householder matrices
238  if (k == 0)
239  {
240  v_k_tilde = viennacl::linalg::prod(matrix, res);
241  precond.apply(v_k_tilde);
242  }
243  else
244  {
245  viennacl::traits::clear(v_k_tilde);
246  v_k_tilde[k-1] = CPU_ScalarType(1);
247 
248  //Householder rotations, part 1: Compute P_1 * P_2 * ... * P_{k-1} * e_{k-1}
249  for (int i = k-1; i > -1; --i)
250  detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[i], betas[i]);
251 
252  v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
253  precond.apply(v_k_tilde_temp);
254  v_k_tilde = v_k_tilde_temp;
255 
256  //Householder rotations, part 2: Compute P_{k-1} * ... * P_{1} * v_k_tilde
257  for (unsigned int i = 0; i < k; ++i)
258  detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[i], betas[i]);
259  }
260 
261  //
262  // Compute Householder reflection for v_k_tilde such that all entries below k-th entry are zero:
263  //
264  CPU_ScalarType rho_k_k = 0;
265  detail::gmres_setup_householder_vector(v_k_tilde, householder_reflectors[k], betas[k], rho_k_k, k);
266 
267  //
268  // copy first k entries from v_k_tilde to R[k] in order to fill k-th column with result of
269  // P_k * v_k_tilde = (v[0], ... , v[k-1], norm(v), 0, 0, ...) =: (rho_{1,k}, rho_{2,k}, ..., rho_{k,k}, 0, ..., 0);
270  //
271  detail::gmres_copy_helper(v_k_tilde, R[k], k);
272  R[k][k] = rho_k_k;
273 
274  //
275  // Update residual: r = P_k r
276  // Set zeta_k = r[k] including machine precision considerations: mathematically we have |r[k]| <= rho
277  // Set rho *= sin(acos(r[k] / rho))
278  //
279  detail::gmres_householder_reflect(res, householder_reflectors[k], betas[k]);
280 
281  if (res[k] > rho) //machine precision reached
282  res[k] = rho;
283  if (res[k] < -rho) //machine precision reached
284  res[k] = -rho;
285  projection_rhs[k] = res[k];
286 
287  rho *= std::sin( std::acos(projection_rhs[k] / rho) );
288 
289  if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance()) // Residual is sufficiently reduced, stop here
290  {
291  tag.error( std::fabs(rho*rho_0 / norm_rhs) );
292  ++k;
293  break;
294  }
295  } // for k
296 
297  //
298  // Triangular solver stage:
299  //
300 
301  for (int i=k-1; i>-1; --i)
302  {
303  for (unsigned int j=i+1; j<k; ++j)
304  projection_rhs[i] -= R[j][i] * projection_rhs[j]; //R is transposed
305 
306  projection_rhs[i] /= R[i][i];
307  }
308 
309  //
310  // Note: 'projection_rhs' now holds the solution (eta_1, ..., eta_k)
311  //
312 
313  res *= projection_rhs[0];
314 
315  if (k > 0)
316  {
317  for (unsigned int i = 0; i < k-1; ++i)
318  res[i] += projection_rhs[i+1];
319  }
320 
321  //
322  // Form z inplace in 'res' by applying P_1 * ... * P_{k}
323  //
324  for (int i=k-1; i>=0; --i)
325  detail::gmres_householder_reflect(res, householder_reflectors[i], betas[i]);
326 
327  res *= rho_0;
328  result += res; // x += rho_0 * z in the paper
329 
330  //
331  // Check for convergence:
332  //
333  tag.error(std::fabs(rho*rho_0 / norm_rhs));
334  if ( tag.error() < tag.tolerance() )
335  return result;
336  }
337 
338  return result;
339  }
340 
343  template <typename MatrixType, typename VectorType>
344  VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag)
345  {
346  return solve(matrix, rhs, tag, no_precond());
347  }
348 
349 
350  }
351 }
352 
353 #endif
double error() const
Returns the estimated relative error at the end of the solver run.
Definition: gmres.hpp:77
base_type::difference_type difference_type
Definition: vector.hpp:985
std::size_t vcl_size_t
Definition: forwards.h:58
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:86
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Various little tools used here and there in ViennaCL.
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Definition: gmres.hpp:79
A dense matrix class.
Definition: forwards.h:293
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
unsigned int krylov_dim() const
Returns the maximum dimension of the Krylov space before restart.
Definition: gmres.hpp:61
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
Definition: gmres.hpp:44
double tolerance() const
Returns the relative tolerance.
Definition: gmres.hpp:57
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
A tag class representing the use of no preconditioner.
Definition: forwards.h:727
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:803
T::value_type type
Definition: result_of.hpp:230
void gmres_householder_reflect(VectorType &x, VectorType const &h, ScalarType beta)
Definition: gmres.hpp:150
Generic clear functionality for different vector and matrix types.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
void gmres_copy_helper(SRC_VECTOR const &src, DEST_VECTOR &dest, vcl_size_t len, vcl_size_t start=0)
Definition: gmres.hpp:95
unsigned int max_restarts() const
Returns the maximum number of GMRES restarts.
Definition: gmres.hpp:63
unsigned int max_iterations() const
Returns the maximum number of iterations.
Definition: gmres.hpp:59
VectorType solve(const MatrixType &matrix, VectorType const &rhs, bicgstab_tag const &tag)
Implementation of the stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:92
gmres_tag(double tol=1e-10, unsigned int max_iterations=300, unsigned int krylov_dim=20)
The constructor.
Definition: gmres.hpp:53
void iters(unsigned int i) const
Set the number of solver iterations (should only be modified by the solver)
Definition: gmres.hpp:74
unsigned int iters() const
Return the number of solver iterations:
Definition: gmres.hpp:72
A collection of compile time type deductions.
void gmres_setup_householder_vector(VectorType const &input_vec, VectorType &hh_vec, ScalarType &beta, ScalarType &mu, vcl_size_t j)
Computes the householder vector 'hh_vec' which rotates 'input_vec' such that all entries below the j-...
Definition: gmres.hpp:119