ViennaCL - The Vienna Computing Library  1.5.2
qr-method.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_QR_METHOD_HPP_
2 #define VIENNACL_LINALG_QR_METHOD_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include "viennacl/vector.hpp"
22 #include "viennacl/matrix.hpp"
23 
25 #include "viennacl/linalg/prod.hpp"
26 
27 #include <boost/numeric/ublas/vector.hpp>
28 #include <boost/numeric/ublas/matrix.hpp>
29 
34 namespace viennacl
35 {
36  namespace linalg
37  {
38  namespace detail
39  {
40  template<typename MatrixType, typename VectorType>
41  void givens_next(MatrixType& matrix,
42  VectorType& tmp1,
43  VectorType& tmp2,
44  int l,
45  int m
46  )
47  {
48  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
49 
50  typedef typename MatrixType::value_type ScalarType;
51  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
52 
54 
55  kernel.global_work_size(0, viennacl::tools::align_to_multiple<cl_uint>(cl_uint(viennacl::traits::size1(matrix)), 256));
56  kernel.local_work_size(0, 256);
57 
59  matrix,
60  tmp1,
61  tmp2,
62  static_cast<cl_uint>(matrix.size1()),
63  static_cast<cl_uint>(matrix.internal_size2()),
64  static_cast<cl_uint>(l),
65  static_cast<cl_uint>(m - 1)
66  ));
67  }
68 
69 
70  // Symmetric tridiagonal QL algorithm.
71  // This is derived from the Algol procedures tql2, by Bowdler, Martin, Reinsch, and Wilkinson,
72  // Handbook for Auto. Comp., Vol.ii-Linear Algebra, and the corresponding Fortran subroutine in EISPACK.
73  template <typename SCALARTYPE, unsigned int ALIGNMENT>
75  boost::numeric::ublas::vector<SCALARTYPE> & d,
76  boost::numeric::ublas::vector<SCALARTYPE> & e)
77  {
78  int n = static_cast<int>(Q.size1());
79 
80  boost::numeric::ublas::vector<SCALARTYPE> cs(n), ss(n);
81  viennacl::vector<SCALARTYPE> tmp1(n), tmp2(n);
82 
83  for (int i = 1; i < n; i++)
84  e(i - 1) = e(i);
85 
86  e(n - 1) = 0;
87 
88  SCALARTYPE f = 0;
89  SCALARTYPE tst1 = 0;
90  SCALARTYPE eps = 2 * static_cast<SCALARTYPE>(EPS);
91 
92  for (int l = 0; l < n; l++)
93  {
94  // Find small subdiagonal element.
95  tst1 = std::max<SCALARTYPE>(tst1, std::fabs(d(l)) + std::fabs(e(l)));
96  int m = l;
97  while (m < n)
98  {
99  if (std::fabs(e(m)) <= eps * tst1)
100  break;
101  m++;
102  }
103 
104  // If m == l, d(l) is an eigenvalue, otherwise, iterate.
105  if (m > l)
106  {
107  int iter = 0;
108  do
109  {
110  iter = iter + 1; // (Could check iteration count here.)
111 
112  // Compute implicit shift
113  SCALARTYPE g = d(l);
114  SCALARTYPE p = (d(l + 1) - g) / (2 * e(l));
115  SCALARTYPE r = pythag<SCALARTYPE>(p, 1);
116  if (p < 0)
117  {
118  r = -r;
119  }
120 
121  d(l) = e(l) / (p + r);
122  d(l + 1) = e(l) * (p + r);
123  SCALARTYPE dl1 = d(l + 1);
124  SCALARTYPE h = g - d(l);
125  for (int i = l + 2; i < n; i++)
126  {
127  d(i) -= h;
128  }
129 
130  f = f + h;
131 
132  // Implicit QL transformation.
133  p = d(m);
134  SCALARTYPE c = 1;
135  SCALARTYPE c2 = c;
136  SCALARTYPE c3 = c;
137  SCALARTYPE el1 = e(l + 1);
138  SCALARTYPE s = 0;
139  SCALARTYPE s2 = 0;
140  for (int i = m - 1; i >= l; i--)
141  {
142  c3 = c2;
143  c2 = c;
144  s2 = s;
145  g = c * e(i);
146  h = c * p;
147  r = pythag(p, e(i));
148  e(i + 1) = s * r;
149  s = e(i) / r;
150  c = p / r;
151  p = c * d(i) - s * g;
152  d(i + 1) = h + s * (c * g + s * d(i));
153 
154  cs[i] = c;
155  ss[i] = s;
156  }
157 
158  p = -s * s2 * c3 * el1 * e(l) / dl1;
159  e(l) = s * p;
160  d(l) = c * p;
161 
162  {
163  viennacl::copy(cs, tmp1);
164  viennacl::copy(ss, tmp2);
165 
166  givens_next(Q, tmp1, tmp2, l, m);
167  }
168 
169  // Check for convergence.
170  }
171  while (std::fabs(e(l)) > eps * tst1);
172  }
173  d(l) = d(l) + f;
174  e(l) = 0;
175  }
176  }
177 
178  template <typename SCALARTYPE, typename MatrixT>
179  void final_iter_update_gpu(MatrixT& A,
180  int n,
181  int last_n,
182  SCALARTYPE q,
183  SCALARTYPE p
184  )
185  {
186  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
187 
189 
190  viennacl::ocl::enqueue(kernel(
191  A,
192  static_cast<cl_uint>(A.internal_size1()),
193  static_cast<cl_uint>(n),
194  static_cast<cl_uint>(last_n),
195  q,
196  p
197  ));
198  }
199 
200  template <typename SCALARTYPE, typename MatrixT>
201  void update_float_QR_column_gpu(MatrixT& A,
202  const std::vector<SCALARTYPE>& buf,
204  int m,
205  int n,
206  int last_n,
207  bool //is_triangular
208  )
209  {
210  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
211 
212  viennacl::fast_copy(buf, buf_vcl);
213 
215 
216  viennacl::ocl::enqueue(kernel(
217  A,
218  static_cast<cl_uint>(A.internal_size1()),
219  buf_vcl,
220  static_cast<cl_uint>(m),
221  static_cast<cl_uint>(n),
222  static_cast<cl_uint>(last_n)
223  ));
224  }
225 
226  template <typename SCALARTYPE, typename MatrixT>
227  void final_iter_update(MatrixT& A,
228  int n,
229  int last_n,
230  SCALARTYPE q,
231  SCALARTYPE p
232  )
233  {
234  for (int i = 0; i < last_n; i++)
235  {
236  SCALARTYPE v_in = A(i, n);
237  SCALARTYPE z = A(i, n - 1);
238  A(i, n - 1) = q * z + p * v_in;
239  A(i, n) = q * v_in - p * z;
240  }
241  }
242 
243  template <typename SCALARTYPE, typename MatrixT>
244  void update_float_QR_column(MatrixT& A,
245  const std::vector<SCALARTYPE>& buf,
246  int m,
247  int n,
248  int last_i,
249  bool is_triangular
250  )
251  {
252  for (int i = 0; i < last_i; i++)
253  {
254  int start_k = is_triangular?std::max(i + 1, m):m;
255 
256  SCALARTYPE* a_row = A.row(i);
257 
258  SCALARTYPE a_ik = a_row[start_k];
259  SCALARTYPE a_ik_1 = 0;
260  SCALARTYPE a_ik_2 = 0;
261 
262  if(start_k < n)
263  a_ik_1 = a_row[start_k + 1];
264 
265  for(int k = start_k; k < n; k++)
266  {
267  bool notlast = (k != n - 1);
268 
269  SCALARTYPE p = buf[5 * k] * a_ik + buf[5 * k + 1] * a_ik_1;
270 
271  if (notlast)
272  {
273  a_ik_2 = a_row[k + 2];
274  p = p + buf[5 * k + 2] * a_ik_2;
275  a_ik_2 = a_ik_2 - p * buf[5 * k + 4];
276  }
277 
278  a_row[k] = a_ik - p;
279  a_ik_1 = a_ik_1 - p * buf[5 * k + 3];
280 
281  a_ik = a_ik_1;
282  a_ik_1 = a_ik_2;
283  }
284 
285  if(start_k < n)
286  a_row[n] = a_ik;
287  }
288  }
289 
291  template <typename SCALARTYPE>
293  {
294  public:
296  {
297  size_ = 0;
298  }
299 
300  FastMatrix(vcl_size_t sz, vcl_size_t internal_size) : size_(sz), internal_size_(internal_size)
301  {
302  data.resize(internal_size * internal_size);
303  }
304 
305  SCALARTYPE& operator()(int i, int j)
306  {
307  return data[i * internal_size_ + j];
308  }
309 
310  SCALARTYPE* row(int i)
311  {
312  return &data[i * internal_size_];
313  }
314 
315  SCALARTYPE* begin()
316  {
317  return &data[0];
318  }
319 
320  SCALARTYPE* end()
321  {
322  return &data[0] + data.size();
323  }
324 
325  std::vector<SCALARTYPE> data;
326  private:
327  vcl_size_t size_;
328  vcl_size_t internal_size_;
329  };
330 
331  // Nonsymmetric reduction from Hessenberg to real Schur form.
332  // This is derived from the Algol procedure hqr2, by Martin and Wilkinson, Handbook for Auto. Comp.,
333  // Vol.ii-Linear Algebra, and the corresponding Fortran subroutine in EISPACK.
334  template <typename SCALARTYPE, unsigned int ALIGNMENT>
337  boost::numeric::ublas::vector<SCALARTYPE>& d,
338  boost::numeric::ublas::vector<SCALARTYPE>& e)
339  {
340  transpose(V);
341 
342  int nn = static_cast<int>(vcl_H.size1());
343 
344  FastMatrix<SCALARTYPE> H(nn, vcl_H.internal_size2());//, V(nn);
345 
346  std::vector<SCALARTYPE> buf(5 * nn);
347  viennacl::vector<SCALARTYPE> buf_vcl(5 * nn);
348 
349  viennacl::fast_copy(vcl_H, H.begin());
350 
351 
352  int n = nn - 1;
353 
354  SCALARTYPE eps = 2 * static_cast<SCALARTYPE>(EPS);
355  SCALARTYPE exshift = 0;
356  SCALARTYPE p = 0;
357  SCALARTYPE q = 0;
358  SCALARTYPE r = 0;
359  SCALARTYPE s = 0;
360  SCALARTYPE z = 0;
361  SCALARTYPE t;
362  SCALARTYPE w;
363  SCALARTYPE x;
364  SCALARTYPE y;
365 
366  SCALARTYPE out1, out2;
367 
368  // compute matrix norm
369  SCALARTYPE norm = 0;
370  for (int i = 0; i < nn; i++)
371  {
372  for (int j = std::max(i - 1, 0); j < nn; j++)
373  norm = norm + std::fabs(H(i, j));
374  }
375 
376  // Outer loop over eigenvalue index
377  int iter = 0;
378  while (n >= 0)
379  {
380  // Look for single small sub-diagonal element
381  int l = n;
382  while (l > 0)
383  {
384  s = std::fabs(H(l - 1, l - 1)) + std::fabs(H(l, l));
385  if (s == 0) s = norm;
386  if (std::fabs(H(l, l - 1)) < eps * s)
387  break;
388 
389  l--;
390  }
391 
392  // Check for convergence
393  if (l == n)
394  {
395  // One root found
396  H(n, n) = H(n, n) + exshift;
397  d(n) = H(n, n);
398  e(n) = 0;
399  n--;
400  iter = 0;
401  }
402  else if (l == n - 1)
403  {
404  // Two roots found
405  w = H(n, n - 1) * H(n - 1, n);
406  p = (H(n - 1, n - 1) - H(n, n)) / 2;
407  q = p * p + w;
408  z = static_cast<SCALARTYPE>(std::sqrt(std::fabs(q)));
409  H(n, n) = H(n, n) + exshift;
410  H(n - 1, n - 1) = H(n - 1, n - 1) + exshift;
411  x = H(n, n);
412 
413  if (q >= 0)
414  {
415  // Real pair
416  z = (p >= 0) ? (p + z) : (p - z);
417  d(n - 1) = x + z;
418  d(n) = d(n - 1);
419  if (z != 0)
420  d(n) = x - w / z;
421  e(n - 1) = 0;
422  e(n) = 0;
423  x = H(n, n - 1);
424  s = std::fabs(x) + std::fabs(z);
425  p = x / s;
426  q = z / s;
427  r = static_cast<SCALARTYPE>(std::sqrt(p * p + q * q));
428  p = p / r;
429  q = q / r;
430 
431  // Row modification
432  for (int j = n - 1; j < nn; j++)
433  {
434  SCALARTYPE h_nj = H(n, j);
435  z = H(n - 1, j);
436  H(n - 1, j) = q * z + p * h_nj;
437  H(n, j) = q * h_nj - p * z;
438  }
439 
440  final_iter_update(H, n, n + 1, q, p);
441  final_iter_update_gpu(V, n, nn, q, p);
442  }
443  else
444  {
445  // Complex pair
446  d(n - 1) = x + p;
447  d(n) = x + p;
448  e(n - 1) = z;
449  e(n) = -z;
450  }
451 
452  n = n - 2;
453  iter = 0;
454  }
455  else
456  {
457  // No convergence yet
458 
459  // Form shift
460  x = H(n, n);
461  y = 0;
462  w = 0;
463  if (l < n)
464  {
465  y = H(n - 1, n - 1);
466  w = H(n, n - 1) * H(n - 1, n);
467  }
468 
469  // Wilkinson's original ad hoc shift
470  if (iter == 10)
471  {
472  exshift += x;
473  for (int i = 0; i <= n; i++)
474  H(i, i) -= x;
475 
476  s = std::fabs(H(n, n - 1)) + std::fabs(H(n - 1, n - 2));
477  x = y = SCALARTYPE(0.75) * s;
478  w = SCALARTYPE(-0.4375) * s * s;
479  }
480 
481  // MATLAB's new ad hoc shift
482  if (iter == 30)
483  {
484  s = (y - x) / 2;
485  s = s * s + w;
486  if (s > 0)
487  {
488  s = static_cast<SCALARTYPE>(std::sqrt(s));
489  if (y < x) s = -s;
490  s = x - w / ((y - x) / 2 + s);
491  for (int i = 0; i <= n; i++)
492  H(i, i) -= s;
493  exshift += s;
494  x = y = w = SCALARTYPE(0.964);
495  }
496  }
497 
498  iter = iter + 1;
499 
500  // Look for two consecutive small sub-diagonal elements
501  int m = n - 2;
502  while (m >= l)
503  {
504  SCALARTYPE h_m1_m1 = H(m + 1, m + 1);
505  z = H(m, m);
506  r = x - z;
507  s = y - z;
508  p = (r * s - w) / H(m + 1, m) + H(m, m + 1);
509  q = h_m1_m1 - z - r - s;
510  r = H(m + 2, m + 1);
511  s = std::fabs(p) + std::fabs(q) + std::fabs(r);
512  p = p / s;
513  q = q / s;
514  r = r / s;
515  if (m == l)
516  break;
517  if (std::fabs(H(m, m - 1)) * (std::fabs(q) + std::fabs(r)) < eps * (std::fabs(p) * (std::fabs(H(m - 1, m - 1)) + std::fabs(z) + std::fabs(h_m1_m1))))
518  break;
519  m--;
520  }
521 
522  for (int i = m + 2; i <= n; i++)
523  {
524  H(i, i - 2) = 0;
525  if (i > m + 2)
526  H(i, i - 3) = 0;
527  }
528 
529  // float QR step involving rows l:n and columns m:n
530  for (int k = m; k < n; k++)
531  {
532  bool notlast = (k != n - 1);
533  if (k != m)
534  {
535  p = H(k, k - 1);
536  q = H(k + 1, k - 1);
537  r = (notlast ? H(k + 2, k - 1) : 0);
538  x = std::fabs(p) + std::fabs(q) + std::fabs(r);
539  if (x != 0)
540  {
541  p = p / x;
542  q = q / x;
543  r = r / x;
544  }
545  }
546 
547  if (x == 0) break;
548 
549  s = static_cast<SCALARTYPE>(std::sqrt(p * p + q * q + r * r));
550  if (p < 0) s = -s;
551 
552  if (s != 0)
553  {
554  if (k != m)
555  H(k, k - 1) = -s * x;
556  else
557  if (l != m)
558  H(k, k - 1) = -H(k, k - 1);
559 
560  p = p + s;
561  y = q / s;
562  z = r / s;
563  x = p / s;
564  q = q / p;
565  r = r / p;
566 
567  buf[5 * k] = x;
568  buf[5 * k + 1] = y;
569  buf[5 * k + 2] = z;
570  buf[5 * k + 3] = q;
571  buf[5 * k + 4] = r;
572 
573 
574  SCALARTYPE* a_row_k = H.row(k);
575  SCALARTYPE* a_row_k_1 = H.row(k + 1);
576  SCALARTYPE* a_row_k_2 = H.row(k + 2);
577  // Row modification
578  for (int j = k; j < nn; j++)
579  {
580  SCALARTYPE h_kj = a_row_k[j];
581  SCALARTYPE h_k1_j = a_row_k_1[j];
582 
583  p = h_kj + q * h_k1_j;
584  if (notlast)
585  {
586  SCALARTYPE h_k2_j = a_row_k_2[j];
587  p = p + r * h_k2_j;
588  a_row_k_2[j] = h_k2_j - p * z;
589  }
590 
591  a_row_k[j] = h_kj - p * x;
592  a_row_k_1[j] = h_k1_j - p * y;
593  }
594 
595  //H(k + 1, nn - 1) = h_kj;
596 
597 
598  // Column modification
599  for (int i = k; i < std::min(nn, k + 4); i++)
600  {
601  p = x * H(i, k) + y * H(i, k + 1);
602  if (notlast)
603  {
604  p = p + z * H(i, k + 2);
605  H(i, k + 2) = H(i, k + 2) - p * r;
606  }
607 
608  H(i, k) = H(i, k) - p;
609  H(i, k + 1) = H(i, k + 1) - p * q;
610  }
611  }
612  else
613  {
614  buf[5 * k] = 0;
615  buf[5 * k + 1] = 0;
616  buf[5 * k + 2] = 0;
617  buf[5 * k + 3] = 0;
618  buf[5 * k + 4] = 0;
619  }
620  }
621 
622  // Timer timer;
623  // timer.start();
624 
625  update_float_QR_column(H, buf, m, n, n, true);
626  update_float_QR_column_gpu(V, buf, buf_vcl, m, n, nn, false);
627 
628  // std::cout << timer.get() << "\n";
629  }
630  }
631 
632  // Backsubstitute to find vectors of upper triangular form
633  if (norm == 0)
634  {
635  return;
636  }
637 
638  for (n = nn - 1; n >= 0; n--)
639  {
640  p = d(n);
641  q = e(n);
642 
643  // Real vector
644  if (q == 0)
645  {
646  int l = n;
647  H(n, n) = 1;
648  for (int i = n - 1; i >= 0; i--)
649  {
650  w = H(i, i) - p;
651  r = 0;
652  for (int j = l; j <= n; j++)
653  r = r + H(i, j) * H(j, n);
654 
655  if (e(i) < 0)
656  {
657  z = w;
658  s = r;
659  }
660  else
661  {
662  l = i;
663  if (e(i) == 0)
664  {
665  H(i, n) = (w != 0) ? (-r / w) : (-r / (eps * norm));
666  }
667  else
668  {
669  // Solve real equations
670  x = H(i, i + 1);
671  y = H(i + 1, i);
672  q = (d(i) - p) * (d(i) - p) + e(i) * e(i);
673  t = (x * s - z * r) / q;
674  H(i, n) = t;
675  H(i + 1, n) = (std::fabs(x) > std::fabs(z)) ? ((-r - w * t) / x) : ((-s - y * t) / z);
676  }
677 
678  // Overflow control
679  t = std::fabs(H(i, n));
680  if ((eps * t) * t > 1)
681  for (int j = i; j <= n; j++)
682  H(j, n) /= t;
683  }
684  }
685  }
686  else if (q < 0)
687  {
688  // Complex vector
689  int l = n - 1;
690 
691  // Last vector component imaginary so matrix is triangular
692  if (std::fabs(H(n, n - 1)) > std::fabs(H(n - 1, n)))
693  {
694  H(n - 1, n - 1) = q / H(n, n - 1);
695  H(n - 1, n) = -(H(n, n) - p) / H(n, n - 1);
696  }
697  else
698  {
699  cdiv<SCALARTYPE>(0, -H(n - 1, n), H(n - 1, n - 1) - p, q, out1, out2);
700 
701  H(n - 1, n - 1) = out1;
702  H(n - 1, n) = out2;
703  }
704 
705  H(n, n - 1) = 0;
706  H(n, n) = 1;
707  for (int i = n - 2; i >= 0; i--)
708  {
709  SCALARTYPE ra, sa, vr, vi;
710  ra = 0;
711  sa = 0;
712  for (int j = l; j <= n; j++)
713  {
714  SCALARTYPE h_ij = H(i, j);
715  ra = ra + h_ij * H(j, n - 1);
716  sa = sa + h_ij * H(j, n);
717  }
718 
719  w = H(i, i) - p;
720 
721  if (e(i) < 0)
722  {
723  z = w;
724  r = ra;
725  s = sa;
726  }
727  else
728  {
729  l = i;
730  if (e(i) == 0)
731  {
732  cdiv<SCALARTYPE>(-ra, -sa, w, q, out1, out2);
733  H(i, n - 1) = out1;
734  H(i, n) = out2;
735  }
736  else
737  {
738  // Solve complex equations
739  x = H(i, i + 1);
740  y = H(i + 1, i);
741  vr = (d(i) - p) * (d(i) - p) + e(i) * e(i) - q * q;
742  vi = (d(i) - p) * 2 * q;
743  if ( (vr == 0) && (vi == 0) )
744  vr = eps * norm * (std::fabs(w) + std::fabs(q) + std::fabs(x) + std::fabs(y) + std::fabs(z));
745 
746  cdiv<SCALARTYPE>(x * r - z * ra + q * sa, x * s - z * sa - q * ra, vr, vi, out1, out2);
747 
748  H(i, n - 1) = out1;
749  H(i, n) = out2;
750 
751 
752  if (std::fabs(x) > (std::fabs(z) + std::fabs(q)))
753  {
754  H(i + 1, n - 1) = (-ra - w * H(i, n - 1) + q * H(i, n)) / x;
755  H(i + 1, n) = (-sa - w * H(i, n) - q * H(i, n - 1)) / x;
756  }
757  else
758  {
759  cdiv<SCALARTYPE>(-r - y * H(i, n - 1), -s - y * H(i, n), z, q, out1, out2);
760 
761  H(i + 1, n - 1) = out1;
762  H(i + 1, n) = out2;
763  }
764  }
765 
766  // Overflow control
767  t = std::max(std::fabs(H(i, n - 1)), std::fabs(H(i, n)));
768  if ((eps * t) * t > 1)
769  {
770  for (int j = i; j <= n; j++)
771  {
772  H(j, n - 1) /= t;
773  H(j, n) /= t;
774  }
775  }
776  }
777  }
778  }
779  }
780 
781  viennacl::fast_copy(H.begin(), H.end(), vcl_H);
782  // viennacl::fast_copy(V.begin(), V.end(), vcl_V);
783 
785 
786  V = viennacl::linalg::prod(trans(tmp), vcl_H);
787  }
788 
789  template <typename SCALARTYPE, unsigned int ALIGNMENT>
795  {
796  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
797 
798  if(start + 2 >= A.size1())
799  return false;
800 
801  prepare_householder_vector(A, D, A.size1(), start + 1, start, start + 1, true);
802 
803  {
805 
806  viennacl::ocl::enqueue(kernel(
807  A,
808  D,
809  static_cast<cl_uint>(start + 1),
810  static_cast<cl_uint>(start),
811  static_cast<cl_uint>(A.size1()),
812  static_cast<cl_uint>(A.size2()),
813  static_cast<cl_uint>(A.internal_size2()),
814  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * 4))
815  ));
816  }
817 
818  {
820 
821  viennacl::ocl::enqueue(kernel(
822  A,
823  D,
824  static_cast<cl_uint>(0),
825  static_cast<cl_uint>(0),
826  static_cast<cl_uint>(A.size1()),
827  static_cast<cl_uint>(A.size2()),
828  static_cast<cl_uint>(A.internal_size2()),
829  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
830  ));
831  }
832 
833  {
835 
836  viennacl::ocl::enqueue(kernel(
837  Q,
838  D,
839  static_cast<cl_uint>(A.size1()),
840  static_cast<cl_uint>(A.size2()),
841  static_cast<cl_uint>(Q.internal_size2()),
842  viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE)))
843  ));
844  }
845 
846  return true;
847  }
848 
849  template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
852  {
853  vcl_size_t sz = A.size1();
854 
855  viennacl::vector<SCALARTYPE> hh_vector(sz);
856 
857  for(vcl_size_t i = 0; i < sz; i++)
858  {
859  householder_twoside(A, Q, hh_vector, i);
860  }
861 
862  }
863 
864  template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
867  boost::numeric::ublas::vector<SCALARTYPE> & D,
868  boost::numeric::ublas::vector<SCALARTYPE> & E,
869  bool is_symmetric = true)
870  {
871  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
872 
873  assert(A.size1() == A.size2() && bool("Input matrix must be square for QR method!"));
874 
875  D.resize(A.size1());
876  E.resize(A.size1());
877 
879 
881 
882  // reduce to tridiagonal form
884 
885  // pack diagonal and super-diagonal
886  // ublas::vector<SCALARTYPE> D(A.size1()), E(A.size1());
887 
888  bidiag_pack(A, D, E);
889 
890  // find eigenvalues
891  if(is_symmetric)
892  {
893 
894  detail::tql2(Q, D, E);
895  transpose(Q);
896  }
897  else
898  {
899  detail::hqr2(A, Q, D, E);
900  }
901 
902  // std::cout << A << "\n";
903 
904  boost::numeric::ublas::matrix<float> eigen_values(A.size1(), A.size1());
905  eigen_values.clear();
906 
907  for (vcl_size_t i = 0; i < A.size1(); i++)
908  {
909  if(std::fabs(E(i)) < EPS)
910  {
911  eigen_values(i, i) = D(i);
912  }
913  else
914  {
915  eigen_values(i, i) = D(i);
916  eigen_values(i, i + 1) = E(i);
917  eigen_values(i + 1, i) = -E(i);
918  eigen_values(i + 1, i + 1) = D(i);
919  i++;
920  }
921  }
922 
923  copy(eigen_values, A);
924  }
925  }
926 
927 
928  template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
931  boost::numeric::ublas::vector<SCALARTYPE>& D,
932  boost::numeric::ublas::vector<SCALARTYPE>& E
933  )
934  {
935  detail::qr_method(A, Q, D, E, false);
936  }
937 
938  template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
941  boost::numeric::ublas::vector<SCALARTYPE>& D
942  )
943  {
944  boost::numeric::ublas::vector<SCALARTYPE> E(A.size1());
945 
946  detail::qr_method(A, Q, D, E, true);
947  }
948 
949  }
950 }
951 
952 #endif
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
std::size_t vcl_size_t
Definition: forwards.h:58
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Definition: sparse_matrix_operations.hpp:330
void final_iter_update(MatrixT &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
Definition: qr-method.hpp:227
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: forwards.h:299
FastMatrix()
Definition: qr-method.hpp:295
const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL
Definition: qr-method-common.hpp:44
SCALARTYPE * end()
Definition: qr-method.hpp:320
Internal helper class representing a row-major dense matrix used for the QR method for the purpose of...
Definition: qr-method.hpp:292
void tridiagonal_reduction(viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &Q)
Definition: qr-method.hpp:850
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
void tql2(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &Q, boost::numeric::ublas::vector< SCALARTYPE > &d, boost::numeric::ublas::vector< SCALARTYPE > &e)
Definition: qr-method.hpp:74
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
void qr_method(viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &Q, boost::numeric::ublas::vector< SCALARTYPE > &D, boost::numeric::ublas::vector< SCALARTYPE > &E, bool is_symmetric=true)
Definition: qr-method.hpp:865
A dense matrix class.
Definition: forwards.h:293
SCALARTYPE * begin()
Definition: qr-method.hpp:315
void prepare_householder_vector(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, viennacl::vector< SCALARTYPE, ALIGNMENT > &D, vcl_size_t size, vcl_size_t row_start, vcl_size_t col_start, vcl_size_t start, bool is_column)
Definition: qr-method-common.hpp:173
bool householder_twoside(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &Q, viennacl::vector< SCALARTYPE, ALIGNMENT > &D, vcl_size_t start)
Definition: qr-method.hpp:790
void hqr2(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &vcl_H, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &V, boost::numeric::ublas::vector< SCALARTYPE > &d, boost::numeric::ublas::vector< SCALARTYPE > &e)
Definition: qr-method.hpp:335
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
FastMatrix(vcl_size_t sz, vcl_size_t internal_size)
Definition: qr-method.hpp:300
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:649
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
static void init(viennacl::ocl::context &ctx)
Definition: svd.hpp:510
SCALARTYPE & operator()(int i, int j)
Definition: qr-method.hpp:305
Common routines used for the QR method and SVD. Experimental.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: svd.hpp:503
const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL
Definition: qr-method-common.hpp:45
const std::string SVD_FINAL_ITER_UPDATE_KERNEL
Definition: qr-method-common.hpp:53
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
SCALARTYPE * row(int i)
Definition: qr-method.hpp:310
void qr_method_nsm(viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &Q, boost::numeric::ublas::vector< SCALARTYPE > &D, boost::numeric::ublas::vector< SCALARTYPE > &E)
Definition: qr-method.hpp:929
void final_iter_update_gpu(MatrixT &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
Definition: qr-method.hpp:179
std::vector< SCALARTYPE > data
Definition: qr-method.hpp:325
const std::string SVD_GIVENS_NEXT_KERNEL
Definition: qr-method-common.hpp:52
void bidiag_pack(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, VectorType &dh, VectorType &sh)
Definition: qr-method-common.hpp:197
SCALARTYPE pythag(SCALARTYPE a, SCALARTYPE b)
Definition: qr-method-common.hpp:65
const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL
Definition: qr-method-common.hpp:43
void transpose(MatrixType &A)
Definition: qr-method-common.hpp:107
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
void update_float_QR_column(MatrixT &A, const std::vector< SCALARTYPE > &buf, int m, int n, int last_i, bool is_triangular)
Definition: qr-method.hpp:244
const std::string SVD_UPDATE_QR_COLUMN_KERNEL
Definition: qr-method-common.hpp:54
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void givens_next(MatrixType &matrix, VectorType &tmp1, VectorType &tmp2, int l, int m)
Definition: qr-method.hpp:41
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
void qr_method_sym(viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &Q, boost::numeric::ublas::vector< SCALARTYPE > &D)
Definition: qr-method.hpp:939
void update_float_QR_column_gpu(MatrixT &A, const std::vector< SCALARTYPE > &buf, viennacl::vector< SCALARTYPE > &buf_vcl, int m, int n, int last_n, bool)
Definition: qr-method.hpp:201
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
STL-like transfer of a GPU vector to the CPU. The cpu type is assumed to reside in a linear piece of ...
Definition: vector.hpp:1262
size_type size1() const
Returns the number of rows.
Definition: matrix.hpp:625