libMesh
dense_matrix_blas_lapack.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2017 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 // Local Includes
20 #include "libmesh/dense_matrix.h"
21 #include "libmesh/dense_vector.h"
22 
23 
24 #if (LIBMESH_HAVE_PETSC)
25 # include "libmesh/petsc_macro.h"
26 # include <petscblaslapack.h>
27 #endif
28 
29 namespace libMesh
30 {
31 
32 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
33 
34 template<typename T>
37 {
38  int result_size = 0;
39 
40  // For each case, determine the size of the final result make sure
41  // that the inner dimensions match
42  switch (flag)
43  {
44  case LEFT_MULTIPLY:
45  {
46  result_size = other.m() * this->n();
47  if (other.n() == this->m())
48  break;
49  }
50  libmesh_fallthrough();
51  case RIGHT_MULTIPLY:
52  {
53  result_size = other.n() * this->m();
54  if (other.m() == this->n())
55  break;
56  }
57  libmesh_fallthrough();
58  case LEFT_MULTIPLY_TRANSPOSE:
59  {
60  result_size = other.n() * this->n();
61  if (other.m() == this->m())
62  break;
63  }
64  libmesh_fallthrough();
65  case RIGHT_MULTIPLY_TRANSPOSE:
66  {
67  result_size = other.m() * this->m();
68  if (other.n() == this->n())
69  break;
70  }
71  libmesh_fallthrough();
72  default:
73  libmesh_error_msg("Unknown flag selected or matrices are incompatible for multiplication.");
74  }
75 
76  // For this to work, the passed arg. must actually be a DenseMatrix<T>
77  const DenseMatrix<T> * const_that = cast_ptr<const DenseMatrix<T> *>(&other);
78 
79  // Also, although 'that' is logically const in this BLAS routine,
80  // the PETSc BLAS interface does not specify that any of the inputs are
81  // const. To use it, I must cast away const-ness.
82  DenseMatrix<T> * that = const_cast<DenseMatrix<T> *> (const_that);
83 
84  // Initialize A, B pointers for LEFT_MULTIPLY* cases
85  DenseMatrix<T> * A = this;
86  DenseMatrix<T> * B = that;
87 
88  // For RIGHT_MULTIPLY* cases, swap the meaning of A and B.
89  // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished:
90  // pass A B -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A "lt multiply"
91  // pass B A -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B "rt multiply"
92  // pass A B^T -> (Fortran) -> A^T B -> (C++) -> (A^T B)^T -> (identity) -> B^T A "lt multiply t"
93  // pass B^T A -> (Fortran) -> B A^T -> (C++) -> (B A^T)^T -> (identity) -> A B^T "rt multiply t"
94  if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE)
95  std::swap(A,B);
96 
97  // transa, transb values to pass to blas
98  char
99  transa[] = "n",
100  transb[] = "n";
101 
102  // Integer values to pass to BLAS:
103  //
104  // M
105  // In Fortran, the number of rows of op(A),
106  // In the BLAS documentation, typically known as 'M'.
107  //
108  // In C/C++, we set:
109  // M = n_cols(A) if (transa='n')
110  // n_rows(A) if (transa='t')
111  PetscBLASInt M = static_cast<PetscBLASInt>( A->n() );
112 
113  // N
114  // In Fortran, the number of cols of op(B), and also the number of cols of C.
115  // In the BLAS documentation, typically known as 'N'.
116  //
117  // In C/C++, we set:
118  // N = n_rows(B) if (transb='n')
119  // n_cols(B) if (transb='t')
120  PetscBLASInt N = static_cast<PetscBLASInt>( B->m() );
121 
122  // K
123  // In Fortran, the number of cols of op(A), and also
124  // the number of rows of op(B). In the BLAS documentation,
125  // typically known as 'K'.
126  //
127  // In C/C++, we set:
128  // K = n_rows(A) if (transa='n')
129  // n_cols(A) if (transa='t')
130  PetscBLASInt K = static_cast<PetscBLASInt>( A->m() );
131 
132  // LDA (leading dimension of A). In our cases,
133  // LDA is always the number of columns of A.
134  PetscBLASInt LDA = static_cast<PetscBLASInt>( A->n() );
135 
136  // LDB (leading dimension of B). In our cases,
137  // LDB is always the number of columns of B.
138  PetscBLASInt LDB = static_cast<PetscBLASInt>( B->n() );
139 
140  if (flag == LEFT_MULTIPLY_TRANSPOSE)
141  {
142  transb[0] = 't';
143  N = static_cast<PetscBLASInt>( B->n() );
144  }
145 
146  else if (flag == RIGHT_MULTIPLY_TRANSPOSE)
147  {
148  transa[0] = 't';
149  std::swap(M,K);
150  }
151 
152  // LDC (leading dimension of C). LDC is the
153  // number of columns in the solution matrix.
154  PetscBLASInt LDC = M;
155 
156  // Scalar values to pass to BLAS
157  //
158  // scalar multiplying the whole product AB
159  T alpha = 1.;
160 
161  // scalar multiplying C, which is the original matrix.
162  T beta = 0.;
163 
164  // Storage for the result
165  std::vector<T> result (result_size);
166 
167  // Finally ready to call the BLAS
168  BLASgemm_(transa, transb, &M, &N, &K, &alpha, &(A->_val[0]), &LDA, &(B->_val[0]), &LDB, &beta, &result[0], &LDC);
169 
170  // Update the relevant dimension for this matrix.
171  switch (flag)
172  {
173  case LEFT_MULTIPLY: { this->_m = other.m(); break; }
174  case RIGHT_MULTIPLY: { this->_n = other.n(); break; }
175  case LEFT_MULTIPLY_TRANSPOSE: { this->_m = other.n(); break; }
176  case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; }
177  default:
178  libmesh_error_msg("Unknown flag selected.");
179  }
180 
181  // Swap my data vector with the result
182  this->_val.swap(result);
183 }
184 
185 #else
186 
187 template<typename T>
190 {
191  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
192 }
193 
194 #endif
195 
196 
197 
198 
199 
200 
201 
202 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
203 
204 template<typename T>
206 {
207  // If this function was called, there better not be any
208  // previous decomposition of the matrix.
209  libmesh_assert_equal_to (this->_decomposition_type, NONE);
210 
211  // The calling sequence for dgetrf is:
212  // dgetrf(M, N, A, lda, ipiv, info)
213 
214  // M (input)
215  // The number of rows of the matrix A. M >= 0.
216  // In C/C++, pass the number of *cols* of A
217  PetscBLASInt M = this->n();
218 
219  // N (input)
220  // The number of columns of the matrix A. N >= 0.
221  // In C/C++, pass the number of *rows* of A
222  PetscBLASInt N = this->m();
223 
224  // A (input/output) double precision array, dimension (LDA,N)
225  // On entry, the M-by-N matrix to be factored.
226  // On exit, the factors L and U from the factorization
227  // A = P*L*U; the unit diagonal elements of L are not stored.
228  // Here, we pass &(_val[0]).
229 
230  // LDA (input)
231  // The leading dimension of the array A. LDA >= max(1,M).
232  PetscBLASInt LDA = M;
233 
234  // ipiv (output) integer array, dimension (min(m,n))
235  // The pivot indices; for 1 <= i <= min(m,n), row i of the
236  // matrix was interchanged with row IPIV(i).
237  // Here, we pass &(_pivots[0]), a private class member used to store pivots
238  this->_pivots.resize( std::min(M,N) );
239 
240  // info (output)
241  // = 0: successful exit
242  // < 0: if INFO = -i, the i-th argument had an illegal value
243  // > 0: if INFO = i, U(i,i) is exactly zero. The factorization
244  // has been completed, but the factor U is exactly
245  // singular, and division by zero will occur if it is used
246  // to solve a system of equations.
247  PetscBLASInt INFO = 0;
248 
249  // Ready to call the actual factorization routine through PETSc's interface
250  LAPACKgetrf_(&M, &N, &(this->_val[0]), &LDA, &(_pivots[0]), &INFO);
251 
252  // Check return value for errors
253  if (INFO != 0)
254  libmesh_error_msg("INFO=" << INFO << ", Error during Lapack LU factorization!");
255 
256  // Set the flag for LU decomposition
257  this->_decomposition_type = LU_BLAS_LAPACK;
258 }
259 
260 #else
261 
262 template<typename T>
264 {
265  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
266 }
267 
268 #endif
269 
270 
271 
272 template<typename T>
274 {
275  // The calling sequence for dgetrf is:
276  // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
277 
278  // JOBU (input)
279  // Specifies options for computing all or part of the matrix U:
280  // = 'A': all M columns of U are returned in array U:
281  // = 'S': the first min(m,n) columns of U (the left singular
282  // vectors) are returned in the array U;
283  // = 'O': the first min(m,n) columns of U (the left singular
284  // vectors) are overwritten on the array A;
285  // = 'N': no columns of U (no left singular vectors) are
286  // computed.
287  char JOBU = 'N';
288 
289  // JOBVT (input)
290  // Specifies options for computing all or part of the matrix
291  // V**T:
292  // = 'A': all N rows of V**T are returned in the array VT;
293  // = 'S': the first min(m,n) rows of V**T (the right singular
294  // vectors) are returned in the array VT;
295  // = 'O': the first min(m,n) rows of V**T (the right singular
296  // vectors) are overwritten on the array A;
297  // = 'N': no rows of V**T (no right singular vectors) are
298  // computed.
299  char JOBVT = 'N';
300 
301  std::vector<Real> sigma_val;
302  std::vector<Number> U_val;
303  std::vector<Number> VT_val;
304 
305  _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);
306 
307  // Copy the singular values into sigma, ignore U_val and VT_val
308  sigma.resize(cast_int<unsigned int>(sigma_val.size()));
309  for (unsigned int i=0; i<sigma.size(); i++)
310  sigma(i) = sigma_val[i];
311 
312 }
313 
314 template<typename T>
317  DenseMatrix<Number> & VT)
318 {
319  // The calling sequence for dgetrf is:
320  // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
321 
322  // JOBU (input)
323  // Specifies options for computing all or part of the matrix U:
324  // = 'A': all M columns of U are returned in array U:
325  // = 'S': the first min(m,n) columns of U (the left singular
326  // vectors) are returned in the array U;
327  // = 'O': the first min(m,n) columns of U (the left singular
328  // vectors) are overwritten on the array A;
329  // = 'N': no columns of U (no left singular vectors) are
330  // computed.
331  char JOBU = 'S';
332 
333  // JOBVT (input)
334  // Specifies options for computing all or part of the matrix
335  // V**T:
336  // = 'A': all N rows of V**T are returned in the array VT;
337  // = 'S': the first min(m,n) rows of V**T (the right singular
338  // vectors) are returned in the array VT;
339  // = 'O': the first min(m,n) rows of V**T (the right singular
340  // vectors) are overwritten on the array A;
341  // = 'N': no rows of V**T (no right singular vectors) are
342  // computed.
343  char JOBVT = 'S';
344 
345  // Note: Lapack is going to compute the singular values of A^T. If
346  // A=U * S * V^T, then A^T = V * S * U^T, which means that the
347  // values returned in the "U_val" array actually correspond to the
348  // entries of the V matrix, and the values returned in the VT_val
349  // array actually correspond to the entries of U^T. Therefore, we
350  // pass VT in the place of U and U in the place of VT below!
351  std::vector<Real> sigma_val;
352  int M = this->n();
353  int N = this->m();
354  int min_MN = (M < N) ? M : N;
355 
356  // Size user-provided storage appropriately. Inside svd_helper:
357  // U_val is sized to (M x min_MN)
358  // VT_val is sized to (min_MN x N)
359  // So, we set up U to have the shape of "VT_val^T", and VT to
360  // have the shape of "U_val^T".
361  //
362  // Finally, since the results are stored in column-major order by
363  // Lapack, but we actually want the transpose of what Lapack
364  // returns, this means (conveniently) that we don't even have to
365  // copy anything after the call to _svd_helper, it should already be
366  // in the correct order!
367  U.resize(N, min_MN);
368  VT.resize(min_MN, M);
369 
370  _svd_helper(JOBU, JOBVT, sigma_val, VT.get_values(), U.get_values());
371 
372  // Copy the singular values into sigma.
373  sigma.resize(cast_int<unsigned int>(sigma_val.size()));
374  for (unsigned int i=0; i<sigma.size(); i++)
375  sigma(i) = sigma_val[i];
376 }
377 
378 #if (LIBMESH_HAVE_PETSC)
379 
380 template<typename T>
382  char JOBVT,
383  std::vector<Real> & sigma_val,
384  std::vector<Number> & U_val,
385  std::vector<Number> & VT_val)
386 {
387 
388  // M (input)
389  // The number of rows of the matrix A. M >= 0.
390  // In C/C++, pass the number of *cols* of A
391  PetscBLASInt M = this->n();
392 
393  // N (input)
394  // The number of columns of the matrix A. N >= 0.
395  // In C/C++, pass the number of *rows* of A
396  PetscBLASInt N = this->m();
397 
398  PetscBLASInt min_MN = (M < N) ? M : N;
399  PetscBLASInt max_MN = (M > N) ? M : N;
400 
401  // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
402  // On entry, the M-by-N matrix A.
403  // On exit,
404  // if JOBU = 'O', A is overwritten with the first min(m,n)
405  // columns of U (the left singular vectors,
406  // stored columnwise);
407  // if JOBVT = 'O', A is overwritten with the first min(m,n)
408  // rows of V**T (the right singular vectors,
409  // stored rowwise);
410  // if JOBU != 'O' and JOBVT != 'O', the contents of A are destroyed.
411  // Here, we pass &(_val[0]).
412 
413  // LDA (input)
414  // The leading dimension of the array A. LDA >= max(1,M).
415  PetscBLASInt LDA = M;
416 
417  // S (output) DOUBLE PRECISION array, dimension (min(M,N))
418  // The singular values of A, sorted so that S(i) >= S(i+1).
419  sigma_val.resize( min_MN );
420 
421  // LDU (input)
422  // The leading dimension of the array U. LDU >= 1; if
423  // JOBU = 'S' or 'A', LDU >= M.
424  PetscBLASInt LDU = M;
425 
426  // U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
427  // (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
428  // If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
429  // if JOBU = 'S', U contains the first min(m,n) columns of U
430  // (the left singular vectors, stored columnwise);
431  // if JOBU = 'N' or 'O', U is not referenced.
432  if (JOBU == 'S')
433  U_val.resize( LDU*min_MN );
434  else
435  U_val.resize( LDU*M );
436 
437  // LDVT (input)
438  // The leading dimension of the array VT. LDVT >= 1; if
439  // JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
440  PetscBLASInt LDVT = N;
441  if (JOBVT == 'S')
442  LDVT = min_MN;
443 
444  // VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
445  // If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
446  // V**T;
447  // if JOBVT = 'S', VT contains the first min(m,n) rows of
448  // V**T (the right singular vectors, stored rowwise);
449  // if JOBVT = 'N' or 'O', VT is not referenced.
450  VT_val.resize( LDVT*N );
451 
452  // LWORK (input)
453  // The dimension of the array WORK.
454  // LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
455  // For good performance, LWORK should generally be larger.
456  //
457  // If LWORK = -1, then a workspace query is assumed; the routine
458  // only calculates the optimal size of the WORK array, returns
459  // this value as the first entry of the WORK array, and no error
460  // message related to LWORK is issued by XERBLA.
461  PetscBLASInt larger = (3*min_MN+max_MN > 5*min_MN) ? 3*min_MN+max_MN : 5*min_MN;
462  PetscBLASInt LWORK = (larger > 1) ? larger : 1;
463 
464 
465  // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
466  // On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
467  // if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
468  // superdiagonal elements of an upper bidiagonal matrix B
469  // whose diagonal is in S (not necessarily sorted). B
470  // satisfies A = U * B * VT, so it has the same singular values
471  // as A, and singular vectors related by U and VT.
472  std::vector<Number> WORK( LWORK );
473 
474  // INFO (output)
475  // = 0: successful exit.
476  // < 0: if INFO = -i, the i-th argument had an illegal value.
477  // > 0: if DBDSQR did not converge, INFO specifies how many
478  // superdiagonals of an intermediate bidiagonal form B
479  // did not converge to zero. See the description of WORK
480  // above for details.
481  PetscBLASInt INFO = 0;
482 
483  // Ready to call the actual factorization routine through PETSc's interface.
484 #ifdef LIBMESH_USE_REAL_NUMBERS
485  // Note that the call to LAPACKgesvd_ may modify _val
486  LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, &(_val[0]), &LDA, &(sigma_val[0]), &(U_val[0]),
487  &LDU, &(VT_val[0]), &LDVT, &(WORK[0]), &LWORK, &INFO);
488 #else
489  // When we have LIBMESH_USE_COMPLEX_NUMBERS then we must pass an array of Complex
490  // numbers to LAPACKgesvd_, but _val may contain Reals so we copy to Number below to
491  // handle both the real-valued and complex-valued cases.
492  std::vector<Number> val_copy(_val.size());
493  for (std::size_t i=0; i<_val.size(); i++)
494  val_copy[i] = _val[i];
495 
496  std::vector<Real> RWORK(5 * min_MN);
497  LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, &(val_copy[0]), &LDA, &(sigma_val[0]), &(U_val[0]),
498  &LDU, &(VT_val[0]), &LDVT, &(WORK[0]), &LWORK, &(RWORK[0]), &INFO);
499 #endif
500 
501  // Check return value for errors
502  if (INFO != 0)
503  libmesh_error_msg("INFO=" << INFO << ", Error during Lapack SVD calculation!");
504 }
505 
506 
507 #else
508 
509 template<typename T>
510 void DenseMatrix<T>::_svd_helper (char,
511  char,
512  std::vector<Real> &,
513  std::vector<Number> &,
514  std::vector<Number> &)
515 {
516  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
517 }
518 
519 #endif
520 
521 
522 
523 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
524 #if !PETSC_VERSION_LESS_THAN(3,1,0)
525 
526 template<typename T>
528  DenseVector<T> & x,
529  Real rcond) const
530 {
531  // Since BLAS is expecting column-major storage, we first need to
532  // make a transposed copy of *this, then pass it to the gelss
533  // routine instead of the original. This extra copy is kind of a
534  // bummer, it might be better if we could use the full SVD to
535  // compute the least-squares solution instead... Note that it isn't
536  // completely terrible either, since A_trans gets overwritten by
537  // Lapack, and we usually would end up making a copy of A outside
538  // the function call anyway.
539  DenseMatrix<T> A_trans;
540  this->get_transpose(A_trans);
541 
542  // M
543  // The number of rows of the input matrix. M >= 0.
544  // This is actually the number of *columns* of A_trans.
545  PetscBLASInt M = A_trans.n();
546 
547  // N
548  // The number of columns of the matrix A. N >= 0.
549  // This is actually the number of *rows* of A_trans.
550  PetscBLASInt N = A_trans.m();
551 
552  // We'll use the min and max of (M,N) several times below.
553  PetscBLASInt max_MN = std::max(M,N);
554  PetscBLASInt min_MN = std::min(M,N);
555 
556  // NRHS
557  // The number of right hand sides, i.e., the number of columns
558  // of the matrices B and X. NRHS >= 0.
559  // This could later be generalized to solve for multiple right-hand
560  // sides...
561  PetscBLASInt NRHS = 1;
562 
563  // A is double precision array, dimension (LDA,N)
564  // On entry, the M-by-N matrix A.
565  // On exit, the first min(m,n) rows of A are overwritten with
566  // its right singular vectors, stored rowwise.
567  //
568  // The data vector that will be passed to Lapack.
569  std::vector<T> & A_trans_vals = A_trans.get_values();
570 
571  // LDA
572  // The leading dimension of the array A. LDA >= max(1,M).
573  PetscBLASInt LDA = M;
574 
575  // B is double precision array, dimension (LDB,NRHS)
576  // On entry, the M-by-NRHS right hand side matrix B.
577  // On exit, B is overwritten by the N-by-NRHS solution
578  // matrix X. If m >= n and RANK = n, the residual
579  // sum-of-squares for the solution in the i-th column is given
580  // by the sum of squares of elements n+1:m in that column.
581  //
582  // Since we don't want the user's rhs vector to be overwritten by
583  // the solution, we copy the rhs values into the solution vector "x"
584  // now. x needs to be long enough to hold both the (Nx1) solution
585  // vector or the (Mx1) rhs, so size it to the max of those.
586  x.resize(max_MN);
587  for (unsigned i=0; i<rhs.size(); ++i)
588  x(i) = rhs(i);
589 
590  // Make the syntax below simpler by grabbing a reference to this array.
591  std::vector<T> & B = x.get_values();
592 
593  // LDB
594  // The leading dimension of the array B. LDB >= max(1,max(M,N)).
595  PetscBLASInt LDB = x.size();
596 
597  // S is double precision array, dimension (min(M,N))
598  // The singular values of A in decreasing order.
599  // The condition number of A in the 2-norm = S(1)/S(min(m,n)).
600  std::vector<T> S(min_MN);
601 
602  // RCOND
603  // Used to determine the effective rank of A. Singular values
604  // S(i) <= RCOND*S(1) are treated as zero. If RCOND < 0, machine
605  // precision is used instead.
606  Real RCOND = rcond;
607 
608  // RANK
609  // The effective rank of A, i.e., the number of singular values
610  // which are greater than RCOND*S(1).
611  PetscBLASInt RANK = 0;
612 
613  // LWORK
614  // The dimension of the array WORK. LWORK >= 1, and also:
615  // LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
616  // For good performance, LWORK should generally be larger.
617  //
618  // If LWORK = -1, then a workspace query is assumed; the routine
619  // only calculates the optimal size of the WORK array, returns
620  // this value as the first entry of the WORK array, and no error
621  // message related to LWORK is issued by XERBLA.
622  //
623  // The factor of 1.5 is arbitrary and is used to satisfy the "should
624  // generally be larger" clause.
625  PetscBLASInt LWORK = 1.5 * (3*min_MN + std::max(2*min_MN, std::max(max_MN, NRHS)));
626 
627  // WORK is double precision array, dimension (MAX(1,LWORK))
628  // On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
629  std::vector<T> WORK(LWORK);
630 
631  // INFO
632  // = 0: successful exit
633  // < 0: if INFO = -i, the i-th argument had an illegal value.
634  // > 0: the algorithm for computing the SVD failed to converge;
635  // if INFO = i, i off-diagonal elements of an intermediate
636  // bidiagonal form did not converge to zero.
637  PetscBLASInt INFO = 0;
638 
639  // LAPACKgelss_(const PetscBLASInt *, // M
640  // const PetscBLASInt *, // N
641  // const PetscBLASInt *, // NRHS
642  // PetscScalar *, // A
643  // const PetscBLASInt *, // LDA
644  // PetscScalar *, // B
645  // const PetscBLASInt *, // LDB
646  // PetscReal *, // S(out) = singular values of A in increasing order
647  // const PetscReal *, // RCOND = tolerance for singular values
648  // PetscBLASInt *, // RANK(out) = number of "non-zero" singular values
649  // PetscScalar *, // WORK
650  // const PetscBLASInt *, // LWORK
651  // PetscBLASInt *); // INFO
652  LAPACKgelss_(&M, &N, &NRHS, &A_trans_vals[0], &LDA, &B[0], &LDB, &S[0], &RCOND, &RANK, &WORK[0], &LWORK, &INFO);
653 
654  // Check for errors in the Lapack call
655  if (INFO < 0)
656  libmesh_error_msg("Error, argument " << -INFO << " to LAPACKgelss_ had an illegal value.");
657  if (INFO > 0)
658  libmesh_error_msg("The algorithm for computing the SVD failed to converge!");
659 
660  // Debugging: print singular values and information about condition number:
661  // libMesh::err << "RCOND=" << RCOND << std::endl;
662  // libMesh::err << "Singular values: " << std::endl;
663  // for (std::size_t i=0; i<S.size(); ++i)
664  // libMesh::err << S[i] << std::endl;
665  // libMesh::err << "The condition number of A is approximately: " << S[0]/S.back() << std::endl;
666 
667  // Lapack has already written the solution into B, but it will be
668  // the wrong size for non-square problems, so we need to resize it
669  // correctly. The size of the solution vector should be the number
670  // of columns of the original A matrix. Unfortunately, resizing a
671  // DenseVector currently also zeros it out (unlike a std::vector) so
672  // we'll resize the underlying storage directly (the size is not
673  // stored independently elsewhere).
674  x.get_values().resize(this->n());
675 }
676 
677 #else
678 
679 template<typename T>
681  DenseVector<T> & /*x*/,
682  Real /*rcond*/) const
683 {
684  libmesh_error_msg("svd_solve() requires PETSc >= 3.1!");
685 }
686 
687 #endif // !PETSC_VERSION_LESS_THAN(3,1,0)
688 
689 #else
690 template<typename T>
692  DenseVector<T> & /*x*/,
693  Real /*rcond*/) const
694 {
695  libmesh_not_implemented();
696 }
697 #endif // (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
698 
699 
700 
701 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
702 
703 template<typename T>
705  DenseVector<T> & lambda_imag,
706  DenseMatrix<T> * VL,
707  DenseMatrix<T> * VR)
708 {
709  // This algorithm only works for square matrices, so verify this up front.
710  if (this->m() != this->n())
711  libmesh_error_msg("Can only compute eigen-decompositions for square matrices.");
712 
713  // If the user requests left or right eigenvectors, we have to make
714  // sure and pass the transpose of this matrix to Lapack, otherwise
715  // it will compute the inverse transpose of what we are
716  // after... since we know the matrix is square, we can just swap
717  // entries in place. If the user does not request eigenvectors, we
718  // can skip this extra step, since the eigenvalues for A and A^T are
719  // the same.
720  if (VL || VR)
721  {
722  for (unsigned int i=0; i<this->_m; ++i)
723  for (unsigned int j=0; j<i; ++j)
724  std::swap((*this)(i,j), (*this)(j,i));
725  }
726 
727  // The calling sequence for dgeev is:
728  // DGEEV (character JOBVL,
729  // character JOBVR,
730  // integer N,
731  // double precision, dimension( lda, * ) A,
732  // integer LDA,
733  // double precision, dimension( * ) WR,
734  // double precision, dimension( * ) WI,
735  // double precision, dimension( ldvl, * ) VL,
736  // integer LDVL,
737  // double precision, dimension( ldvr, * ) VR,
738  // integer LDVR,
739  // double precision, dimension( * ) WORK,
740  // integer LWORK,
741  // integer INFO)
742 
743  // JOBVL (input)
744  // = 'N': left eigenvectors of A are not computed;
745  // = 'V': left eigenvectors of A are computed.
746  char JOBVL = VL ? 'V' : 'N';
747 
748  // JOBVR (input)
749  // = 'N': right eigenvectors of A are not computed;
750  // = 'V': right eigenvectors of A are computed.
751  char JOBVR = VR ? 'V' : 'N';
752 
753  // N (input)
754  // The number of rows/cols of the matrix A. N >= 0.
755  PetscBLASInt N = this->m();
756 
757  // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
758  // On entry, the N-by-N matrix A.
759  // On exit, A has been overwritten.
760  // Here, we pass &(_val[0]).
761 
762  // LDA (input)
763  // The leading dimension of the array A. LDA >= max(1,N).
764  PetscBLASInt LDA = N;
765 
766  // WR (output) double precision array, dimension (N)
767  // WI (output) double precision array, dimension (N)
768  // WR and WI contain the real and imaginary parts,
769  // respectively, of the computed eigenvalues. Complex
770  // conjugate pairs of eigenvalues appear consecutively
771  // with the eigenvalue having the positive imaginary part
772  // first.
773  lambda_real.resize(N);
774  lambda_imag.resize(N);
775 
776  // VL (output) double precision array, dimension (LDVL,N)
777  // If JOBVL = 'V', the left eigenvectors u(j) are stored one
778  // after another in the columns of VL, in the same order
779  // as their eigenvalues.
780  // If JOBVL = 'N', VL is not referenced.
781  // If the j-th eigenvalue is real, then u(j) = VL(:,j),
782  // the j-th column of VL.
783  // If the j-th and (j+1)-st eigenvalues form a complex
784  // conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
785  // u(j+1) = VL(:,j) - i*VL(:,j+1).
786  // Will be set below if needed.
787 
788  // LDVL (input)
789  // The leading dimension of the array VL. LDVL >= 1; if
790  // JOBVL = 'V', LDVL >= N.
791  PetscBLASInt LDVL = VL ? N : 1;
792 
793  // VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
794  // If JOBVR = 'V', the right eigenvectors v(j) are stored one
795  // after another in the columns of VR, in the same order
796  // as their eigenvalues.
797  // If JOBVR = 'N', VR is not referenced.
798  // If the j-th eigenvalue is real, then v(j) = VR(:,j),
799  // the j-th column of VR.
800  // If the j-th and (j+1)-st eigenvalues form a complex
801  // conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
802  // v(j+1) = VR(:,j) - i*VR(:,j+1).
803  // Will be set below if needed.
804 
805  // LDVR (input)
806  // The leading dimension of the array VR. LDVR >= 1; if
807  // JOBVR = 'V', LDVR >= N.
808  PetscBLASInt LDVR = VR ? N : 1;
809 
810  // WORK (workspace/output) double precision array, dimension (MAX(1,LWORK))
811  // On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
812  //
813  // LWORK (input)
814  // The dimension of the array WORK. LWORK >= max(1,3*N), and
815  // if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good
816  // performance, LWORK must generally be larger.
817  //
818  // If LWORK = -1, then a workspace query is assumed; the routine
819  // only calculates the optimal size of the WORK array, returns
820  // this value as the first entry of the WORK array, and no error
821  // message related to LWORK is issued by XERBLA.
822  PetscBLASInt LWORK = (VR || VL) ? 4*N : 3*N;
823  std::vector<T> WORK(LWORK);
824 
825  // INFO (output)
826  // = 0: successful exit
827  // < 0: if INFO = -i, the i-th argument had an illegal value.
828  // > 0: if INFO = i, the QR algorithm failed to compute all the
829  // eigenvalues, and no eigenvectors or condition numbers
830  // have been computed; elements 1:ILO-1 and i+1:N of WR
831  // and WI contain eigenvalues which have converged.
832  PetscBLASInt INFO = 0;
833 
834  // Get references to raw data
835  std::vector<T> & lambda_real_val = lambda_real.get_values();
836  std::vector<T> & lambda_imag_val = lambda_imag.get_values();
837 
838  // Set up eigenvector storage if necessary.
839  T * VR_ptr = libmesh_nullptr;
840  if (VR)
841  {
842  VR->resize(N, N);
843  VR_ptr = &(VR->get_values()[0]);
844  }
845 
846  T * VL_ptr = libmesh_nullptr;
847  if (VL)
848  {
849  VL->resize(N, N);
850  VL_ptr = &(VL->get_values()[0]);
851  }
852 
853  // Ready to call the Lapack routine through PETSc's interface
854  LAPACKgeev_(&JOBVL,
855  &JOBVR,
856  &N,
857  &(_val[0]),
858  &LDA,
859  &lambda_real_val[0],
860  &lambda_imag_val[0],
861  VL_ptr,
862  &LDVL,
863  VR_ptr,
864  &LDVR,
865  &WORK[0],
866  &LWORK,
867  &INFO);
868 
869  // Check return value for errors
870  if (INFO != 0)
871  libmesh_error_msg("INFO=" << INFO << ", Error during Lapack eigenvalue calculation!");
872 
873  // If the user requested either right or left eigenvectors, LAPACK
874  // has now computed the transpose of the desired matrix, i.e. V^T
875  // instead of V. We could leave this up to user code to handle, but
876  // rather than risking getting very unexpected results, we'll just
877  // transpose it in place before handing it back.
878  if (VR)
879  {
880  for (unsigned int i=0; i<static_cast<unsigned int>(N); ++i)
881  for (unsigned int j=0; j<i; ++j)
882  std::swap((*VR)(i,j), (*VR)(j,i));
883  }
884 
885  if (VL)
886  {
887  for (unsigned int i=0; i<static_cast<unsigned int>(N); ++i)
888  for (unsigned int j=0; j<i; ++j)
889  std::swap((*VL)(i,j), (*VL)(j,i));
890  }
891 }
892 
893 #else
894 
895 template<typename T>
897  DenseVector<T> &,
898  DenseMatrix<T> *,
899  DenseMatrix<T> *)
900 {
901  libmesh_error_msg("_evd_lapack is currently only available when LIBMESH_USE_REAL_NUMBERS is defined!");
902 }
903 
904 #endif
905 
906 
907 
908 
909 
910 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
911 
912 template<typename T>
914  DenseVector<T> & x)
915 {
916  // The calling sequence for getrs is:
917  // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
918 
919  // trans (input)
920  // 'n' for no transpose, 't' for transpose
921  char TRANS[] = "t";
922 
923  // N (input)
924  // The order of the matrix A. N >= 0.
925  PetscBLASInt N = this->m();
926 
927 
928  // NRHS (input)
929  // The number of right hand sides, i.e., the number of columns
930  // of the matrix B. NRHS >= 0.
931  PetscBLASInt NRHS = 1;
932 
933  // A (input) double precision array, dimension (LDA,N)
934  // The factors L and U from the factorization A = P*L*U
935  // as computed by dgetrf.
936  // Here, we pass &(_val[0])
937 
938  // LDA (input)
939  // The leading dimension of the array A. LDA >= max(1,N).
940  PetscBLASInt LDA = N;
941 
942  // ipiv (input) int array, dimension (N)
943  // The pivot indices from DGETRF; for 1<=i<=N, row i of the
944  // matrix was interchanged with row IPIV(i).
945  // Here, we pass &(_pivots[0]) which was computed in _lu_decompose_lapack
946 
947  // B (input/output) double precision array, dimension (LDB,NRHS)
948  // On entry, the right hand side matrix B.
949  // On exit, the solution matrix X.
950  // Here, we pass a copy of the rhs vector's data array in x, so that the
951  // passed right-hand side b is unmodified. I don't see a way around this
952  // copy if we want to maintain an unmodified rhs in LibMesh.
953  x = b;
954  std::vector<T> & x_vec = x.get_values();
955 
956  // We can avoid the copy if we don't care about overwriting the RHS: just
957  // pass b to the Lapack routine and then swap with x before exiting
958  // std::vector<T> & x_vec = b.get_values();
959 
960  // LDB (input)
961  // The leading dimension of the array B. LDB >= max(1,N).
962  PetscBLASInt LDB = N;
963 
964  // INFO (output)
965  // = 0: successful exit
966  // < 0: if INFO = -i, the i-th argument had an illegal value
967  PetscBLASInt INFO = 0;
968 
969  // Finally, ready to call the Lapack getrs function
970  LAPACKgetrs_(TRANS, &N, &NRHS, &(_val[0]), &LDA, &(_pivots[0]), &(x_vec[0]), &LDB, &INFO);
971 
972  // Check return value for errors
973  if (INFO != 0)
974  libmesh_error_msg("INFO=" << INFO << ", Error during Lapack LU solve!");
975 
976  // Don't do this if you already made a copy of b above
977  // Swap b and x. The solution will then be in x, and whatever was originally
978  // in x, maybe garbage, maybe nothing, will be in b.
979  // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite
980  // the input. This *should* make user code simpler, as they don't have to create
981  // an extra vector just to pass it in to the solve function!
982  // b.swap(x);
983 }
984 
985 #else
986 
987 template<typename T>
989  DenseVector<T> &)
990 {
991  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
992 }
993 
994 #endif
995 
996 
997 
998 
999 
1000 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
1001 
1002 template<typename T>
1004  T beta,
1005  DenseVector<T> & dest,
1006  const DenseVector<T> & arg,
1007  bool trans) const
1008 {
1009  // Ensure that dest and arg sizes are compatible
1010  if (!trans)
1011  {
1012  // dest ~ A * arg
1013  // (mx1) (mxn) * (nx1)
1014  if ((dest.size() != this->m()) || (arg.size() != this->n()))
1015  libmesh_error_msg("Improper input argument sizes!");
1016  }
1017 
1018  else // trans == true
1019  {
1020  // Ensure that dest and arg are proper size
1021  // dest ~ A^T * arg
1022  // (nx1) (nxm) * (mx1)
1023  if ((dest.size() != this->n()) || (arg.size() != this->m()))
1024  libmesh_error_msg("Improper input argument sizes!");
1025  }
1026 
1027  // Calling sequence for dgemv:
1028  //
1029  // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
1030 
1031  // TRANS (input)
1032  // 't' for transpose, 'n' for non-transpose multiply
1033  // We store everything in row-major order, so pass the transpose flag for
1034  // non-transposed matvecs and the 'n' flag for transposed matvecs
1035  char TRANS[] = "t";
1036  if (trans)
1037  TRANS[0] = 'n';
1038 
1039  // M (input)
1040  // On entry, M specifies the number of rows of the matrix A.
1041  // In C/C++, pass the number of *cols* of A
1042  PetscBLASInt M = this->n();
1043 
1044  // N (input)
1045  // On entry, N specifies the number of columns of the matrix A.
1046  // In C/C++, pass the number of *rows* of A
1047  PetscBLASInt N = this->m();
1048 
1049  // ALPHA (input)
1050  // The scalar constant passed to this function
1051 
1052  // A (input) double precision array of DIMENSION ( LDA, n ).
1053  // Before entry, the leading m by n part of the array A must
1054  // contain the matrix of coefficients.
1055  // The matrix, *this. Note that _matvec_blas is called from
1056  // a const function, vector_mult(), and so we have made this function const
1057  // as well. Since BLAS knows nothing about const, we have to cast it away
1058  // now.
1059  DenseMatrix<T> & a_ref = const_cast<DenseMatrix<T> &> ( *this );
1060  std::vector<T> & a = a_ref.get_values();
1061 
1062  // LDA (input)
1063  // On entry, LDA specifies the first dimension of A as declared
1064  // in the calling (sub) program. LDA must be at least
1065  // max( 1, m ).
1066  PetscBLASInt LDA = M;
1067 
1068  // X (input) double precision array of DIMENSION at least
1069  // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
1070  // and at least
1071  // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
1072  // Before entry, the incremented array X must contain the
1073  // vector x.
1074  // Here, we must cast away the const-ness of "arg" since BLAS knows
1075  // nothing about const
1076  DenseVector<T> & x_ref = const_cast<DenseVector<T> &> ( arg );
1077  std::vector<T> & x = x_ref.get_values();
1078 
1079  // INCX (input)
1080  // On entry, INCX specifies the increment for the elements of
1081  // X. INCX must not be zero.
1082  PetscBLASInt INCX = 1;
1083 
1084  // BETA (input)
1085  // On entry, BETA specifies the scalar beta. When BETA is
1086  // supplied as zero then Y need not be set on input.
1087  // The second scalar constant passed to this function
1088 
1089  // Y (input) double precision array of DIMENSION at least
1090  // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
1091  // and at least
1092  // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
1093  // Before entry with BETA non-zero, the incremented array Y
1094  // must contain the vector y. On exit, Y is overwritten by the
1095  // updated vector y.
1096  // The input vector "dest"
1097  std::vector<T> & y = dest.get_values();
1098 
1099  // INCY (input)
1100  // On entry, INCY specifies the increment for the elements of
1101  // Y. INCY must not be zero.
1102  PetscBLASInt INCY = 1;
1103 
1104  // Finally, ready to call the BLAS function
1105  BLASgemv_(TRANS, &M, &N, &alpha, &(a[0]), &LDA, &(x[0]), &INCX, &beta, &(y[0]), &INCY);
1106 }
1107 
1108 
1109 #else
1110 
1111 
1112 template<typename T>
1114  T,
1115  DenseVector<T> &,
1116  const DenseVector<T> &,
1117  bool) const
1118 {
1119  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
1120 }
1121 
1122 
1123 #endif
1124 
1125 
1126 //--------------------------------------------------------------
1127 // Explicit instantiations
1131  DenseVector<Real> &);
1133  Real,
1135  const DenseVector<Real> &,
1136  bool) const;
1141 template void DenseMatrix<Real>::_svd_helper (char,
1142  char,
1143  std::vector<Real> &,
1144  std::vector<Number> &,
1145  std::vector<Number> &);
1150  DenseMatrix<Real> *);
1151 
1152 #if !(LIBMESH_USE_REAL_NUMBERS)
1158  Number,
1160  const DenseVector<Number> &,
1161  bool) const;
1166 template void DenseMatrix<Number>::_svd_helper (char,
1167  char,
1168  std::vector<Real> &,
1169  std::vector<Number> &,
1170  std::vector<Number> &);
1176 
1177 #endif
1178 
1179 } // namespace libMesh
void _lu_decompose_lapack()
Computes an LU factorization of the matrix using the Lapack routine "getrf".
unsigned int n() const
_BLAS_Multiply_Flag
Enumeration used to determine the behavior of the _multiply_blas function.
Definition: dense_matrix.h:572
void resize(const unsigned int n)
Resize the vector.
Definition: dense_vector.h:350
std::vector< T > & get_values()
Definition: dense_vector.h:251
const class libmesh_nullptr_t libmesh_nullptr
void _lu_back_substitute_lapack(const DenseVector< T > &b, DenseVector< T > &x)
Companion function to _lu_decompose_lapack().
void _svd_lapack(DenseVector< Real > &sigma)
Computes an SVD of the matrix using the Lapack routine "getsvd".
The libMesh namespace provides an interface to certain functionality in the library.
virtual unsigned int size() const libmesh_override
Definition: dense_vector.h:87
long double max(long double a, double b)
void _matvec_blas(T alpha, T beta, DenseVector< T > &dest, const DenseVector< T > &arg, bool trans=false) const
Uses the BLAS GEMV function (through PETSc) to compute.
Definition: assembly.h:38
void _multiply_blas(const DenseMatrixBase< T > &other, _BLAS_Multiply_Flag flag)
The _multiply_blas function computes A <- op(A) * op(B) using BLAS gemm function. ...
void _svd_helper(char JOBU, char JOBVT, std::vector< Real > &sigma_val, std::vector< Number > &U_val, std::vector< Number > &VT_val)
Helper function that actually performs the SVD.
PetscErrorCode Vec x
unsigned int m() const
std::vector< T > & get_values()
Definition: dense_matrix.h:325
void _svd_solve_lapack(const DenseVector< T > &rhs, DenseVector< T > &x, Real rcond) const
Called by svd_solve(rhs).
void _evd_lapack(DenseVector< T > &lambda_real, DenseVector< T > &lambda_imag, DenseMatrix< T > *VL=libmesh_nullptr, DenseMatrix< T > *VR=libmesh_nullptr)
Computes the eigenvalues of the matrix using the Lapack routine "DGEEV".
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
static PetscErrorCode Mat * A
void swap(Iterator &lhs, Iterator &rhs)
swap, used to implement op=
void resize(const unsigned int new_m, const unsigned int new_n)
Resize the matrix.
Definition: dense_matrix.h:776
std::vector< T > _val
The actual data values, stored as a 1D array.
Definition: dense_matrix.h:516
Defines a dense vector for use in Finite Element-type computations.
Defines an abstract dense matrix base class for use in Finite Element-type computations.
Defines a dense matrix for use in Finite Element-type computations.
Definition: dof_map.h:64
long double min(long double a, double b)