ViennaCL - The Vienna Computing Library  1.5.2
mixed_precision_cg.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
2 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <map>
27 #include <cmath>
28 #include "viennacl/forwards.h"
29 #include "viennacl/tools/tools.hpp"
30 #include "viennacl/linalg/ilu.hpp"
31 #include "viennacl/linalg/prod.hpp"
34 #include "viennacl/traits/size.hpp"
36 #include "viennacl/ocl/backend.hpp"
37 #include "viennacl/ocl/kernel.hpp"
39 
41 
42 namespace viennacl
43 {
44  namespace linalg
45  {
46 
50  {
51  public:
58  mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {}
59 
61  double tolerance() const { return tol_; }
63  float inner_tolerance() const { return inner_tol_; }
65  unsigned int max_iterations() const { return iterations_; }
66 
68  unsigned int iters() const { return iters_taken_; }
69  void iters(unsigned int i) const { iters_taken_ = i; }
70 
72  double error() const { return last_error_; }
74  void error(double e) const { last_error_ = e; }
75 
76 
77  private:
78  double tol_;
79  unsigned int iterations_;
80  float inner_tol_;
81 
82  //return values from solver
83  mutable unsigned int iters_taken_;
84  mutable double last_error_;
85  };
86 
87 
89  "#if defined(cl_khr_fp64)\n"
90  "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
91  "#elif defined(cl_amd_fp64)\n"
92  "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
93  "#endif\n"
94  "__kernel void assign_double_to_float(\n"
95  " __global float * vec1,\n"
96  " __global const double * vec2, \n"
97  " unsigned int size) \n"
98  "{ \n"
99  " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
100  " vec1[i] = (float)(vec2[i]);\n"
101  "};\n\n"
102  "__kernel void inplace_add_float_to_double(\n"
103  " __global double * vec1,\n"
104  " __global const float * vec2, \n"
105  " unsigned int size) \n"
106  "{ \n"
107  " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
108  " vec1[i] += (double)(vec2[i]);\n"
109  "};\n";
110 
111 
121  template <typename MatrixType, typename VectorType>
122  VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag)
123  {
124  //typedef typename VectorType::value_type ScalarType;
125  typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType;
126  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
127 
128  //TODO: Assert CPU_ScalarType == double
129 
130  //std::cout << "Starting CG" << std::endl;
131  vcl_size_t problem_size = viennacl::traits::size(rhs);
132  VectorType result(rhs);
133  viennacl::traits::clear(result);
134 
135  VectorType residual = rhs;
136 
137  CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs);
138  CPU_ScalarType new_ip_rr = 0;
139  CPU_ScalarType norm_rhs_squared = ip_rr;
140 
141  if (norm_rhs_squared == 0) //solution is zero if RHS norm is zero
142  return result;
143 
144  static bool first = true;
145 
146  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
147  if (first)
148  {
149  ctx.add_program(double_float_conversion_program, "double_float_conversion_program");
150  }
151 
152  viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs));
153  viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs));
154  viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs));
155  viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs));
156  float inner_ip_rr = static_cast<float>(ip_rr);
157  float new_inner_ip_rr = 0;
158  float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr);
159  float alpha;
160  float beta;
161 
162  viennacl::ocl::kernel & assign_double_to_float = ctx.get_kernel("double_float_conversion_program", "assign_double_to_float");
163  viennacl::ocl::kernel & inplace_add_float_to_double = ctx.get_kernel("double_float_conversion_program", "inplace_add_float_to_double");
164 
165  // transfer rhs to single precision:
166  viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
167  rhs.handle().opencl_handle(),
168  cl_uint(rhs.size())
169  ) );
170  //std::cout << "copying p_low_precision..." << std::endl;
171  //assign_double_to_float(p_low_precision.handle(), residual.handle(), residual.size());
172  residual_low_precision = p_low_precision;
173 
174  // transfer matrix to single precision:
175  viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs));
176  viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, sizeof(cl_uint) * (matrix.size1() + 1) );
177  viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, sizeof(cl_uint) * (matrix.nnz()) );
178 
179  viennacl::ocl::enqueue( assign_double_to_float(matrix_low_precision.handle().opencl_handle(),
180  matrix.handle().opencl_handle(),
181  cl_uint(matrix.nnz())
182  ) );
183  //std::cout << "copying matrix_low_precision..." << std::endl;
184  //assign_double_to_float(const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle()), matrix.handle(), matrix.nnz());
185 
186  //std::cout << "Starting CG solver iterations... " << std::endl;
187 
188 
189  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
190  {
191  tag.iters(i+1);
192 
193  // lower precision 'inner iteration'
194  tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision);
195 
196  alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision);
197  result_low_precision += alpha * p_low_precision;
198  residual_low_precision -= alpha * tmp_low_precision;
199 
200  new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision);
201 
202  beta = new_inner_ip_rr / inner_ip_rr;
203  inner_ip_rr = new_inner_ip_rr;
204 
205  p_low_precision = residual_low_precision + beta * p_low_precision;
206 
207 
208 
209  if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1)
210  {
211  //std::cout << "outer correction at i=" << i << std::endl;
212  //result += result_low_precision;
213  viennacl::ocl::enqueue( inplace_add_float_to_double(result.handle().opencl_handle(),
214  result_low_precision.handle().opencl_handle(),
215  cl_uint(result.size())
216  ) );
217 
218  // residual = b - Ax (without introducing a temporary)
219  residual = viennacl::linalg::prod(matrix, result);
220  residual = rhs - residual;
221 
222  new_ip_rr = viennacl::linalg::inner_prod(residual, residual);
223  if (new_ip_rr / norm_rhs_squared < tag.tolerance() * tag.tolerance())//squared norms involved here
224  break;
225 
226  // p_low_precision = residual;
227  viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
228  residual.handle().opencl_handle(),
229  cl_uint(residual.size())
230  ) );
231  result_low_precision.clear();
232  residual_low_precision = p_low_precision;
233  initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr);
234  inner_ip_rr = static_cast<float>(new_ip_rr);
235  }
236  }
237 
238  //store last error estimate:
239  tag.error(std::sqrt(new_ip_rr / norm_rhs_squared));
240 
241  return result;
242  }
243 
244  template <typename MatrixType, typename VectorType>
245  VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond)
246  {
247  return solve(matrix, rhs, tag);
248  }
249 
250 
251  }
252 }
253 
254 #endif
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
std::size_t vcl_size_t
Definition: forwards.h:58
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
A dense matrix class.
Definition: forwards.h:293
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
Definition: context.hpp:340
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
const char * double_float_conversion_program
Definition: mixed_precision_cg.hpp:88
A tag class representing the use of no preconditioner.
Definition: forwards.h:727
Implementations of incomplete factorization preconditioners. Convenience header file.
T::value_type type
Definition: result_of.hpp:230
Generic clear functionality for different vector and matrix types.
unsigned int iters() const
Return the number of solver iterations:
Definition: mixed_precision_cg.hpp:68
unsigned int max_iterations() const
Returns the maximum number of iterations.
Definition: mixed_precision_cg.hpp:65
Implementations of the OpenCL backend, where all contexts are stored in.
Proxy classes for vectors.
mixed_precision_cg_tag(double tol=1e-8, unsigned int max_iterations=300, float inner_tol=1e-2f)
The constructor.
Definition: mixed_precision_cg.hpp:58
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
double tolerance() const
Returns the relative tolerance.
Definition: mixed_precision_cg.hpp:61
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
Definition: memory.hpp:140
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Representation of an OpenCL kernel in ViennaCL.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
double error() const
Returns the estimated relative error at the end of the solver run.
Definition: mixed_precision_cg.hpp:72
float inner_tolerance() const
Returns the relative tolerance.
Definition: mixed_precision_cg.hpp:63
const handle_type & handle() const
Returns the memory handle.
Definition: vector.hpp:856
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
void iters(unsigned int i) const
Definition: mixed_precision_cg.hpp:69
VectorType solve(const MatrixType &matrix, VectorType const &rhs, bicgstab_tag const &tag)
Implementation of the stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:92
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Definition: mixed_precision_cg.hpp:49
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Definition: mixed_precision_cg.hpp:74
A collection of compile time type deductions.
void clear()
Resets all entries to zero. Does not change the size of the vector.
Definition: vector.hpp:863
Main interface routines for memory management.