--- rpl/lapack/lapack/dgejsv.f 2010/12/21 13:48:05 1.4
+++ rpl/lapack/lapack/dgejsv.f 2014/01/27 09:28:16 1.12
@@ -1,20 +1,483 @@
- SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
- & M, N, A, LDA, SVA, U, LDU, V, LDV,
- & WORK, LWORK, IWORK, INFO )
+*> \brief \b DGEJSV
+*
+* =========== DOCUMENTATION ===========
*
-* -- LAPACK routine (version 3.3.0) --
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
*
-* -- Contributed by Zlatko Drmac of the University of Zagreb and --
-* -- Kresimir Veselic of the Fernuniversitaet Hagen --
-* November 2010
+*> \htmlonly
+*> Download DGEJSV + dependencies
+*>
+*> [TGZ]
+*>
+*> [ZIP]
+*>
+*> [TXT]
+*> \endhtmlonly
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
+* M, N, A, LDA, SVA, U, LDU, V, LDV,
+* WORK, LWORK, IWORK, INFO )
+*
+* .. Scalar Arguments ..
+* IMPLICIT NONE
+* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N
+* ..
+* .. Array Arguments ..
+* DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ),
+* $ WORK( LWORK )
+* INTEGER IWORK( * )
+* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> DGEJSV computes the singular value decomposition (SVD) of a real M-by-N
+*> matrix [A], where M >= N. The SVD of [A] is written as
+*>
+*> [A] = [U] * [SIGMA] * [V]^t,
+*>
+*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
+*> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
+*> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
+*> the singular values of [A]. The columns of [U] and [V] are the left and
+*> the right singular vectors of [A], respectively. The matrices [U] and [V]
+*> are computed and stored in the arrays U and V, respectively. The diagonal
+*> of [SIGMA] is computed and stored in the array SVA.
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] JOBA
+*> \verbatim
+*> JOBA is CHARACTER*1
+*> Specifies the level of accuracy:
+*> = 'C': This option works well (high relative accuracy) if A = B * D,
+*> with well-conditioned B and arbitrary diagonal matrix D.
+*> The accuracy cannot be spoiled by COLUMN scaling. The
+*> accuracy of the computed output depends on the condition of
+*> B, and the procedure aims at the best theoretical accuracy.
+*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is
+*> bounded by f(M,N)*epsilon* cond(B), independent of D.
+*> The input matrix is preprocessed with the QRF with column
+*> pivoting. This initial preprocessing and preconditioning by
+*> a rank revealing QR factorization is common for all values of
+*> JOBA. Additional actions are specified as follows:
+*> = 'E': Computation as with 'C' with an additional estimate of the
+*> condition number of B. It provides a realistic error bound.
+*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
+*> D1, D2, and well-conditioned matrix C, this option gives
+*> higher accuracy than the 'C' option. If the structure of the
+*> input matrix is not known, and relative accuracy is
+*> desirable, then this option is advisable. The input matrix A
+*> is preprocessed with QR factorization with FULL (row and
+*> column) pivoting.
+*> = 'G' Computation as with 'F' with an additional estimate of the
+*> condition number of B, where A=D*B. If A has heavily weighted
+*> rows, then using this condition number gives too pessimistic
+*> error bound.
+*> = 'A': Small singular values are the noise and the matrix is treated
+*> as numerically rank defficient. The error in the computed
+*> singular values is bounded by f(m,n)*epsilon*||A||.
+*> The computed SVD A = U * S * V^t restores A up to
+*> f(m,n)*epsilon*||A||.
+*> This gives the procedure the licence to discard (set to zero)
+*> all singular values below N*epsilon*||A||.
+*> = 'R': Similar as in 'A'. Rank revealing property of the initial
+*> QR factorization is used do reveal (using triangular factor)
+*> a gap sigma_{r+1} < epsilon * sigma_r in which case the
+*> numerical RANK is declared to be r. The SVD is computed with
+*> absolute error bounds, but more accurately than with 'A'.
+*> \endverbatim
+*>
+*> \param[in] JOBU
+*> \verbatim
+*> JOBU is CHARACTER*1
+*> Specifies whether to compute the columns of U:
+*> = 'U': N columns of U are returned in the array U.
+*> = 'F': full set of M left sing. vectors is returned in the array U.
+*> = 'W': U may be used as workspace of length M*N. See the description
+*> of U.
+*> = 'N': U is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBV
+*> \verbatim
+*> JOBV is CHARACTER*1
+*> Specifies whether to compute the matrix V:
+*> = 'V': N columns of V are returned in the array V; Jacobi rotations
+*> are not explicitly accumulated.
+*> = 'J': N columns of V are returned in the array V, but they are
+*> computed as the product of Jacobi rotations. This option is
+*> allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
+*> = 'W': V may be used as workspace of length N*N. See the description
+*> of V.
+*> = 'N': V is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBR
+*> \verbatim
+*> JOBR is CHARACTER*1
+*> Specifies the RANGE for the singular values. Issues the licence to
+*> set to zero small positive singular values if they are outside
+*> specified range. If A .NE. 0 is scaled so that the largest singular
+*> value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
+*> the licence to kill columns of A whose norm in c*A is less than
+*> DSQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
+*> where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
+*> = 'N': Do not kill small columns of c*A. This option assumes that
+*> BLAS and QR factorizations and triangular solvers are
+*> implemented to work in that range. If the condition of A
+*> is greater than BIG, use DGESVJ.
+*> = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)]
+*> (roughly, as described above). This option is recommended.
+*> ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*> For computing the singular values in the FULL range [SFMIN,BIG]
+*> use DGESVJ.
+*> \endverbatim
+*>
+*> \param[in] JOBT
+*> \verbatim
+*> JOBT is CHARACTER*1
+*> If the matrix is square then the procedure may determine to use
+*> transposed A if A^t seems to be better with respect to convergence.
+*> If the matrix is not square, JOBT is ignored. This is subject to
+*> changes in the future.
+*> The decision is based on two values of entropy over the adjoint
+*> orbit of A^t * A. See the descriptions of WORK(6) and WORK(7).
+*> = 'T': transpose if entropy test indicates possibly faster
+*> convergence of Jacobi process if A^t is taken as input. If A is
+*> replaced with A^t, then the row pivoting is included automatically.
+*> = 'N': do not speculate.
+*> This option can be used to compute only the singular values, or the
+*> full SVD (U, SIGMA and V). For only one set of singular vectors
+*> (U or V), the caller should provide both U and V, as one of the
+*> matrices is used as workspace if the matrix A is transposed.
+*> The implementer can easily remove this constraint and make the
+*> code more complicated. See the descriptions of U and V.
+*> \endverbatim
+*>
+*> \param[in] JOBP
+*> \verbatim
+*> JOBP is CHARACTER*1
+*> Issues the licence to introduce structured perturbations to drown
+*> denormalized numbers. This licence should be active if the
+*> denormals are poorly implemented, causing slow computation,
+*> especially in cases of fast convergence (!). For details see [1,2].
+*> For the sake of simplicity, this perturbations are included only
+*> when the full SVD or only the singular values are requested. The
+*> implementer/user can easily add the perturbation for the cases of
+*> computing one set of singular vectors.
+*> = 'P': introduce perturbation
+*> = 'N': do not perturb
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*> M is INTEGER
+*> The number of rows of the input matrix A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> The number of columns of the input matrix A. M >= N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*> A is DOUBLE PRECISION array, dimension (LDA,N)
+*> On entry, the M-by-N matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*> LDA is INTEGER
+*> The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] SVA
+*> \verbatim
+*> SVA is DOUBLE PRECISION array, dimension (N)
+*> On exit,
+*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
+*> computation SVA contains Euclidean column norms of the
+*> iterated matrices in the array A.
+*> - For WORK(1) .NE. WORK(2): The singular values of A are
+*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
+*> sigma_max(A) overflows or if small singular values have been
+*> saved from underflow by scaling the input matrix A.
+*> - If JOBR='R' then some of the singular values may be returned
+*> as exact zeros obtained by "set to zero" because they are
+*> below the numerical rank threshold or are denormalized numbers.
+*> \endverbatim
+*>
+*> \param[out] U
+*> \verbatim
+*> U is DOUBLE PRECISION array, dimension ( LDU, N )
+*> If JOBU = 'U', then U contains on exit the M-by-N matrix of
+*> the left singular vectors.
+*> If JOBU = 'F', then U contains on exit the M-by-M matrix of
+*> the left singular vectors, including an ONB
+*> of the orthogonal complement of the Range(A).
+*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
+*> then U is used as workspace if the procedure
+*> replaces A with A^t. In that case, [V] is computed
+*> in U as left singular vectors of A^t and then
+*> copied back to the V array. This 'W' option is just
+*> a reminder to the caller that in this case U is
+*> reserved as workspace of length N*N.
+*> If JOBU = 'N' U is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*> LDU is INTEGER
+*> The leading dimension of the array U, LDU >= 1.
+*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M.
+*> \endverbatim
+*>
+*> \param[out] V
+*> \verbatim
+*> V is DOUBLE PRECISION array, dimension ( LDV, N )
+*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
+*> the right singular vectors;
+*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
+*> then V is used as workspace if the pprocedure
+*> replaces A with A^t. In that case, [U] is computed
+*> in V as right singular vectors of A^t and then
+*> copied back to the U array. This 'W' option is just
+*> a reminder to the caller that in this case V is
+*> reserved as workspace of length N*N.
+*> If JOBV = 'N' V is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*> LDV is INTEGER
+*> The leading dimension of the array V, LDV >= 1.
+*> If JOBV = 'V' or 'J' or 'W', then LDV >= N.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*> WORK is DOUBLE PRECISION array, dimension at least LWORK.
+*> On exit, if N.GT.0 .AND. M.GT.0 (else not referenced),
+*> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such
+*> that SCALE*SVA(1:N) are the computed singular values
+*> of A. (See the description of SVA().)
+*> WORK(2) = See the description of WORK(1).
+*> WORK(3) = SCONDA is an estimate for the condition number of
+*> column equilibrated A. (If JOBA .EQ. 'E' or 'G')
+*> SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
+*> It is computed using DPOCON. It holds
+*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
+*> where R is the triangular factor from the QRF of A.
+*> However, if R is truncated and the numerical rank is
+*> determined to be strictly smaller than N, SCONDA is
+*> returned as -1, thus indicating that the smallest
+*> singular values might be lost.
+*>
+*> If full SVD is needed, the following two condition numbers are
+*> useful for the analysis of the algorithm. They are provied for
+*> a developer/implementer who is familiar with the details of
+*> the method.
+*>
+*> WORK(4) = an estimate of the scaled condition number of the
+*> triangular factor in the first QR factorization.
+*> WORK(5) = an estimate of the scaled condition number of the
+*> triangular factor in the second QR factorization.
+*> The following two parameters are computed if JOBT .EQ. 'T'.
+*> They are provided for a developer/implementer who is familiar
+*> with the details of the method.
+*>
+*> WORK(6) = the entropy of A^t*A :: this is the Shannon entropy
+*> of diag(A^t*A) / Trace(A^t*A) taken as point in the
+*> probability simplex.
+*> WORK(7) = the entropy of A*A^t.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*> LWORK is INTEGER
+*> Length of WORK to confirm proper allocation of work space.
+*> LWORK depends on the job:
+*>
+*> If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
+*> -> .. no scaled condition estimate required (JOBE.EQ.'N'):
+*> LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
+*> ->> For optimal performance (blocked code) the optimal value
+*> is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
+*> block size for DGEQP3 and DGEQRF.
+*> In general, optimal LWORK is computed as
+*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7).
+*> -> .. an estimate of the scaled condition number of A is
+*> required (JOBA='E', 'G'). In this case, LWORK is the maximum
+*> of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7).
+*> ->> For optimal performance (blocked code) the optimal value
+*> is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7).
+*> In general, the optimal length LWORK is computed as
+*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF),
+*> N+N*N+LWORK(DPOCON),7).
+*>
+*> If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
+*> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
+*> -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
+*> where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ,
+*> DORMLQ. In general, the optimal length LWORK is computed as
+*> LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON),
+*> N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)).
+*>
+*> If SIGMA and the left singular vectors are needed
+*> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
+*> -> For optimal performance:
+*> if JOBU.EQ.'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
+*> if JOBU.EQ.'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7),
+*> where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR.
+*> In general, the optimal length LWORK is computed as
+*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON),
+*> 2*N+LWORK(DGEQRF), N+LWORK(DORMQR)).
+*> Here LWORK(DORMQR) equals N*NB (for JOBU.EQ.'U') or
+*> M*NB (for JOBU.EQ.'F').
+*>
+*> If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and
+*> -> if JOBV.EQ.'V'
+*> the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N).
+*> -> if JOBV.EQ.'J' the minimal requirement is
+*> LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6).
+*> -> For optimal performance, LWORK should be additionally
+*> larger than N+M*NB, where NB is the optimal block size
+*> for DORMQR.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*> IWORK is INTEGER array, dimension M+3*N.
+*> On exit,
+*> IWORK(1) = the numerical rank determined after the initial
+*> QR factorization with pivoting. See the descriptions
+*> of JOBA and JOBR.
+*> IWORK(2) = the number of the computed nonzero singular values
+*> IWORK(3) = if nonzero, a warning message:
+*> If IWORK(3).EQ.1 then some of the column norms of A
+*> were denormalized floats. The requested high accuracy
+*> is not warranted by the data.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*> INFO is INTEGER
+*> < 0 : if INFO = -i, then the i-th argument had an illegal value.
+*> = 0 : successfull exit;
+*> > 0 : DGEJSV did not converge in the maximal allowed number
+*> of sweeps. The computed values may be inaccurate.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date September 2012
+*
+*> \ingroup doubleGEsing
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses DGEQP3,
+*> DGEQRF, and DGELQF as preprocessors and preconditioners. Optionally, an
+*> additional row pivoting can be used as a preprocessor, which in some
+*> cases results in much higher accuracy. An example is matrix A with the
+*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
+*> diagonal matrices and C is well-conditioned matrix. In that case, complete
+*> pivoting in the first QR factorizations provides accuracy dependent on the
+*> condition number of C, and independent of D1, D2. Such higher accuracy is
+*> not completely understood theoretically, but it works well in practice.
+*> Further, if A can be written as A = B*D, with well-conditioned B and some
+*> diagonal D, then the high accuracy is guaranteed, both theoretically and
+*> in software, independent of D. For more details see [1], [2].
+*> The computational range for the singular values can be the full range
+*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
+*> & LAPACK routines called by DGEJSV are implemented to work in that range.
+*> If that is not the case, then the restriction for safe computation with
+*> the singular values in the range of normalized IEEE numbers is that the
+*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
+*> overflow. This code (DGEJSV) is best used in this restricted range,
+*> meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
+*> returned as zeros. See JOBR for details on this.
+*> Further, this implementation is somewhat slower than the one described
+*> in [1,2] due to replacement of some non-LAPACK components, and because
+*> the choice of some tuning parameters in the iterative part (DGESVJ) is
+*> left to the implementer on a particular machine.
+*> The rank revealing QR factorization (in this code: DGEQP3) should be
+*> implemented as in [3]. We have a new version of DGEQP3 under development
+*> that is more robust than the current one in LAPACK, with a cleaner cut in
+*> rank defficient cases. It will be available in the SIGMA library [4].
+*> If M is much larger than N, it is obvious that the inital QRF with
+*> column pivoting can be preprocessed by the QRF without pivoting. That
+*> well known trick is not used in DGEJSV because in some cases heavy row
+*> weighting can be treated with complete pivoting. The overhead in cases
+*> M much larger than N is then only due to pivoting, but the benefits in
+*> terms of accuracy have prevailed. The implementer/user can incorporate
+*> this extra QRF step easily. The implementer can also improve data movement
+*> (matrix transpose, matrix copy, matrix transposed copy) - this
+*> implementation of DGEJSV uses only the simplest, naive data movement.
+*> \endverbatim
+*
+*> \par Contributors:
+* ==================
+*>
+*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
+*
+*> \par References:
+* ================
+*>
+*> \verbatim
+*>
+*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
+*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
+*> LAPACK Working note 169.
+*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
+*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
+*> LAPACK Working note 170.
+*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
+*> factorization software - a case study.
+*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
+*> LAPACK Working note 176.
+*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
+*> QSVD, (H,K)-SVD computations.
+*> Department of Mathematics, University of Zagreb, 2008.
+*> \endverbatim
+*
+*> \par Bugs, examples and comments:
+* =================================
+*>
+*> Please report all bugs and send interesting examples and/or comments to
+*> drmac@math.hr. Thank you.
+*>
+* =====================================================================
+ SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
+ $ M, N, A, LDA, SVA, U, LDU, V, LDV,
+ $ WORK, LWORK, IWORK, INFO )
*
+* -- LAPACK computational routine (version 3.4.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-* This routine is also part of SIGMA (version 1.23, October 23. 2008.)
-* SIGMA is a library of algorithms for highly accurate algorithms for
-* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the
-* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0.
+* September 2012
*
* .. Scalar Arguments ..
IMPLICIT NONE
@@ -22,349 +485,12 @@
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ),
- & WORK( LWORK )
+ $ WORK( LWORK )
INTEGER IWORK( * )
CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
* ..
*
-* Purpose
-* =======
-*
-* DGEJSV computes the singular value decomposition (SVD) of a real M-by-N
-* matrix [A], where M >= N. The SVD of [A] is written as
-*
-* [A] = [U] * [SIGMA] * [V]^t,
-*
-* where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
-* diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
-* [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
-* the singular values of [A]. The columns of [U] and [V] are the left and
-* the right singular vectors of [A], respectively. The matrices [U] and [V]
-* are computed and stored in the arrays U and V, respectively. The diagonal
-* of [SIGMA] is computed and stored in the array SVA.
-*
-* Arguments
-* =========
-*
-* JOBA (input) CHARACTER*1
-* Specifies the level of accuracy:
-* = 'C': This option works well (high relative accuracy) if A = B * D,
-* with well-conditioned B and arbitrary diagonal matrix D.
-* The accuracy cannot be spoiled by COLUMN scaling. The
-* accuracy of the computed output depends on the condition of
-* B, and the procedure aims at the best theoretical accuracy.
-* The relative error max_{i=1:N}|d sigma_i| / sigma_i is
-* bounded by f(M,N)*epsilon* cond(B), independent of D.
-* The input matrix is preprocessed with the QRF with column
-* pivoting. This initial preprocessing and preconditioning by
-* a rank revealing QR factorization is common for all values of
-* JOBA. Additional actions are specified as follows:
-* = 'E': Computation as with 'C' with an additional estimate of the
-* condition number of B. It provides a realistic error bound.
-* = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
-* D1, D2, and well-conditioned matrix C, this option gives
-* higher accuracy than the 'C' option. If the structure of the
-* input matrix is not known, and relative accuracy is
-* desirable, then this option is advisable. The input matrix A
-* is preprocessed with QR factorization with FULL (row and
-* column) pivoting.
-* = 'G' Computation as with 'F' with an additional estimate of the
-* condition number of B, where A=D*B. If A has heavily weighted
-* rows, then using this condition number gives too pessimistic
-* error bound.
-* = 'A': Small singular values are the noise and the matrix is treated
-* as numerically rank defficient. The error in the computed
-* singular values is bounded by f(m,n)*epsilon*||A||.
-* The computed SVD A = U * S * V^t restores A up to
-* f(m,n)*epsilon*||A||.
-* This gives the procedure the licence to discard (set to zero)
-* all singular values below N*epsilon*||A||.
-* = 'R': Similar as in 'A'. Rank revealing property of the initial
-* QR factorization is used do reveal (using triangular factor)
-* a gap sigma_{r+1} < epsilon * sigma_r in which case the
-* numerical RANK is declared to be r. The SVD is computed with
-* absolute error bounds, but more accurately than with 'A'.
-*
-* JOBU (input) CHARACTER*1
-* Specifies whether to compute the columns of U:
-* = 'U': N columns of U are returned in the array U.
-* = 'F': full set of M left sing. vectors is returned in the array U.
-* = 'W': U may be used as workspace of length M*N. See the description
-* of U.
-* = 'N': U is not computed.
-*
-* JOBV (input) CHARACTER*1
-* Specifies whether to compute the matrix V:
-* = 'V': N columns of V are returned in the array V; Jacobi rotations
-* are not explicitly accumulated.
-* = 'J': N columns of V are returned in the array V, but they are
-* computed as the product of Jacobi rotations. This option is
-* allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
-* = 'W': V may be used as workspace of length N*N. See the description
-* of V.
-* = 'N': V is not computed.
-*
-* JOBR (input) CHARACTER*1
-* Specifies the RANGE for the singular values. Issues the licence to
-* set to zero small positive singular values if they are outside
-* specified range. If A .NE. 0 is scaled so that the largest singular
-* value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
-* the licence to kill columns of A whose norm in c*A is less than
-* DSQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
-* where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
-* = 'N': Do not kill small columns of c*A. This option assumes that
-* BLAS and QR factorizations and triangular solvers are
-* implemented to work in that range. If the condition of A
-* is greater than BIG, use DGESVJ.
-* = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)]
-* (roughly, as described above). This option is recommended.
-* ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-* For computing the singular values in the FULL range [SFMIN,BIG]
-* use DGESVJ.
-*
-* JOBT (input) CHARACTER*1
-* If the matrix is square then the procedure may determine to use
-* transposed A if A^t seems to be better with respect to convergence.
-* If the matrix is not square, JOBT is ignored. This is subject to
-* changes in the future.
-* The decision is based on two values of entropy over the adjoint
-* orbit of A^t * A. See the descriptions of WORK(6) and WORK(7).
-* = 'T': transpose if entropy test indicates possibly faster
-* convergence of Jacobi process if A^t is taken as input. If A is
-* replaced with A^t, then the row pivoting is included automatically.
-* = 'N': do not speculate.
-* This option can be used to compute only the singular values, or the
-* full SVD (U, SIGMA and V). For only one set of singular vectors
-* (U or V), the caller should provide both U and V, as one of the
-* matrices is used as workspace if the matrix A is transposed.
-* The implementer can easily remove this constraint and make the
-* code more complicated. See the descriptions of U and V.
-*
-* JOBP (input) CHARACTER*1
-* Issues the licence to introduce structured perturbations to drown
-* denormalized numbers. This licence should be active if the
-* denormals are poorly implemented, causing slow computation,
-* especially in cases of fast convergence (!). For details see [1,2].
-* For the sake of simplicity, this perturbations are included only
-* when the full SVD or only the singular values are requested. The
-* implementer/user can easily add the perturbation for the cases of
-* computing one set of singular vectors.
-* = 'P': introduce perturbation
-* = 'N': do not perturb
-*
-* M (input) INTEGER
-* The number of rows of the input matrix A. M >= 0.
-*
-* N (input) INTEGER
-* The number of columns of the input matrix A. M >= N >= 0.
-*
-* A (input/workspace) DOUBLE PRECISION array, dimension (LDA,N)
-* On entry, the M-by-N matrix A.
-*
-* LDA (input) INTEGER
-* The leading dimension of the array A. LDA >= max(1,M).
-*
-* SVA (workspace/output) DOUBLE PRECISION array, dimension (N)
-* On exit,
-* - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
-* computation SVA contains Euclidean column norms of the
-* iterated matrices in the array A.
-* - For WORK(1) .NE. WORK(2): The singular values of A are
-* (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
-* sigma_max(A) overflows or if small singular values have been
-* saved from underflow by scaling the input matrix A.
-* - If JOBR='R' then some of the singular values may be returned
-* as exact zeros obtained by "set to zero" because they are
-* below the numerical rank threshold or are denormalized numbers.
-*
-* U (workspace/output) DOUBLE PRECISION array, dimension ( LDU, N )
-* If JOBU = 'U', then U contains on exit the M-by-N matrix of
-* the left singular vectors.
-* If JOBU = 'F', then U contains on exit the M-by-M matrix of
-* the left singular vectors, including an ONB
-* of the orthogonal complement of the Range(A).
-* If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
-* then U is used as workspace if the procedure
-* replaces A with A^t. In that case, [V] is computed
-* in U as left singular vectors of A^t and then
-* copied back to the V array. This 'W' option is just
-* a reminder to the caller that in this case U is
-* reserved as workspace of length N*N.
-* If JOBU = 'N' U is not referenced.
-*
-* LDU (input) INTEGER
-* The leading dimension of the array U, LDU >= 1.
-* IF JOBU = 'U' or 'F' or 'W', then LDU >= M.
-*
-* V (workspace/output) DOUBLE PRECISION array, dimension ( LDV, N )
-* If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
-* the right singular vectors;
-* If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
-* then V is used as workspace if the pprocedure
-* replaces A with A^t. In that case, [U] is computed
-* in V as right singular vectors of A^t and then
-* copied back to the U array. This 'W' option is just
-* a reminder to the caller that in this case V is
-* reserved as workspace of length N*N.
-* If JOBV = 'N' V is not referenced.
-*
-* LDV (input) INTEGER
-* The leading dimension of the array V, LDV >= 1.
-* If JOBV = 'V' or 'J' or 'W', then LDV >= N.
-*
-* WORK (workspace/output) DOUBLE PRECISION array, dimension at least LWORK.
-* On exit,
-* WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such
-* that SCALE*SVA(1:N) are the computed singular values
-* of A. (See the description of SVA().)
-* WORK(2) = See the description of WORK(1).
-* WORK(3) = SCONDA is an estimate for the condition number of
-* column equilibrated A. (If JOBA .EQ. 'E' or 'G')
-* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
-* It is computed using DPOCON. It holds
-* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
-* where R is the triangular factor from the QRF of A.
-* However, if R is truncated and the numerical rank is
-* determined to be strictly smaller than N, SCONDA is
-* returned as -1, thus indicating that the smallest
-* singular values might be lost.
-*
-* If full SVD is needed, the following two condition numbers are
-* useful for the analysis of the algorithm. They are provied for
-* a developer/implementer who is familiar with the details of
-* the method.
-*
-* WORK(4) = an estimate of the scaled condition number of the
-* triangular factor in the first QR factorization.
-* WORK(5) = an estimate of the scaled condition number of the
-* triangular factor in the second QR factorization.
-* The following two parameters are computed if JOBT .EQ. 'T'.
-* They are provided for a developer/implementer who is familiar
-* with the details of the method.
-*
-* WORK(6) = the entropy of A^t*A :: this is the Shannon entropy
-* of diag(A^t*A) / Trace(A^t*A) taken as point in the
-* probability simplex.
-* WORK(7) = the entropy of A*A^t.
-*
-* LWORK (input) INTEGER
-* Length of WORK to confirm proper allocation of work space.
-* LWORK depends on the job:
-*
-* If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
-* -> .. no scaled condition estimate required ( JOBE.EQ.'N'):
-* LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
-* For optimal performance (blocked code) the optimal value
-* is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
-* block size for xGEQP3/xGEQRF.
-* -> .. an estimate of the scaled condition number of A is
-* required (JOBA='E', 'G'). In this case, LWORK is the maximum
-* of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4N,7).
-*
-* If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
-* -> the minimal requirement is LWORK >= max(2*N+M,7).
-* -> For optimal performance, LWORK >= max(2*N+M,2*N+N*NB,7),
-* where NB is the optimal block size.
-*
-* If SIGMA and the left singular vectors are needed
-* -> the minimal requirement is LWORK >= max(2*N+M,7).
-* -> For optimal performance, LWORK >= max(2*N+M,2*N+N*NB,7),
-* where NB is the optimal block size.
-*
-* If full SVD is needed ( JOBU.EQ.'U' or 'F', JOBV.EQ.'V' ) and
-* -> .. the singular vectors are computed without explicit
-* accumulation of the Jacobi rotations, LWORK >= 6*N+2*N*N
-* -> .. in the iterative part, the Jacobi rotations are
-* explicitly accumulated (option, see the description of JOBV),
-* then the minimal requirement is LWORK >= max(M+3*N+N*N,7).
-* For better performance, if NB is the optimal block size,
-* LWORK >= max(3*N+N*N+M,3*N+N*N+N*NB,7).
-*
-* IWORK (workspace/output) INTEGER array, dimension M+3*N.
-* On exit,
-* IWORK(1) = the numerical rank determined after the initial
-* QR factorization with pivoting. See the descriptions
-* of JOBA and JOBR.
-* IWORK(2) = the number of the computed nonzero singular values
-* IWORK(3) = if nonzero, a warning message:
-* If IWORK(3).EQ.1 then some of the column norms of A
-* were denormalized floats. The requested high accuracy
-* is not warranted by the data.
-*
-* INFO (output) INTEGER
-* < 0 : if INFO = -i, then the i-th argument had an illegal value.
-* = 0 : successfull exit;
-* > 0 : DGEJSV did not converge in the maximal allowed number
-* of sweeps. The computed values may be inaccurate.
-*
-* Further Details
-* ===============
-*
-* DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3,
-* SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an
-* additional row pivoting can be used as a preprocessor, which in some
-* cases results in much higher accuracy. An example is matrix A with the
-* structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
-* diagonal matrices and C is well-conditioned matrix. In that case, complete
-* pivoting in the first QR factorizations provides accuracy dependent on the
-* condition number of C, and independent of D1, D2. Such higher accuracy is
-* not completely understood theoretically, but it works well in practice.
-* Further, if A can be written as A = B*D, with well-conditioned B and some
-* diagonal D, then the high accuracy is guaranteed, both theoretically and
-* in software, independent of D. For more details see [1], [2].
-* The computational range for the singular values can be the full range
-* ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
-* & LAPACK routines called by DGEJSV are implemented to work in that range.
-* If that is not the case, then the restriction for safe computation with
-* the singular values in the range of normalized IEEE numbers is that the
-* spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
-* overflow. This code (DGEJSV) is best used in this restricted range,
-* meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are
-* returned as zeros. See JOBR for details on this.
-* Further, this implementation is somewhat slower than the one described
-* in [1,2] due to replacement of some non-LAPACK components, and because
-* the choice of some tuning parameters in the iterative part (DGESVJ) is
-* left to the implementer on a particular machine.
-* The rank revealing QR factorization (in this code: SGEQP3) should be
-* implemented as in [3]. We have a new version of SGEQP3 under development
-* that is more robust than the current one in LAPACK, with a cleaner cut in
-* rank defficient cases. It will be available in the SIGMA library [4].
-* If M is much larger than N, it is obvious that the inital QRF with
-* column pivoting can be preprocessed by the QRF without pivoting. That
-* well known trick is not used in DGEJSV because in some cases heavy row
-* weighting can be treated with complete pivoting. The overhead in cases
-* M much larger than N is then only due to pivoting, but the benefits in
-* terms of accuracy have prevailed. The implementer/user can incorporate
-* this extra QRF step easily. The implementer can also improve data movement
-* (matrix transpose, matrix copy, matrix transposed copy) - this
-* implementation of DGEJSV uses only the simplest, naive data movement.
-*
-* Contributors
-*
-* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
-*
-* References
-*
-* [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
-* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
-* LAPACK Working note 169.
-* [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
-* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
-* LAPACK Working note 170.
-* [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
-* factorization software - a case study.
-* ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
-* LAPACK Working note 176.
-* [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
-* QSVD, (H,K)-SVD computations.
-* Department of Mathematics, University of Zagreb, 2008.
-*
-* Bugs, examples and comments
-*
-* Please report all bugs and send interesting examples and/or comments to
-* drmac@math.hr. Thank you.
-*
-* ==========================================================================
+* ===========================================================================
*
* .. Local Parameters ..
DOUBLE PRECISION ZERO, ONE
@@ -372,16 +498,16 @@
* ..
* .. Local Scalars ..
DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
- & CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM,
- & SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC
+ $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM,
+ $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC
INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LSVEC,
- & L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
- & NOSCAL, ROWPIV, RSVEC, TRANSP
+ $ L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
+ $ NOSCAL, ROWPIV, RSVEC, TRANSP
* ..
* .. Intrinsic Functions ..
INTRINSIC DABS, DLOG, DMAX1, DMIN1, DBLE,
- & MAX0, MIN0, IDNINT, DSIGN, DSQRT
+ $ MAX0, MIN0, IDNINT, DSIGN, DSQRT
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH, DNRM2
@@ -391,8 +517,8 @@
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DGELQF, DGEQP3, DGEQRF, DLACPY, DLASCL,
- & DLASET, DLASSQ, DLASWP, DORGQR, DORMLQ,
- & DORMQR, DPOCON, DSCAL, DSWAP, DTRSM, XERBLA
+ $ DLASET, DLASSQ, DLASWP, DORGQR, DORMLQ,
+ $ DORMQR, DPOCON, DSCAL, DSWAP, DTRSM, XERBLA
*
EXTERNAL DGESVJ
* ..
@@ -412,13 +538,13 @@
L2PERT = LSAME( JOBP, 'P' )
*
IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR.
- & ERREST .OR. LSAME( JOBA, 'C' ) )) THEN
+ $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN
INFO = - 1
ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR.
- & LSAME( JOBU, 'W' )) ) THEN
+ $ LSAME( JOBU, 'W' )) ) THEN
INFO = - 2
ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR.
- & LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN
+ $ LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN
INFO = - 3
ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN
INFO = - 4
@@ -438,12 +564,16 @@
INFO = - 14
ELSE IF ( (.NOT.(LSVEC .OR. RSVEC .OR. ERREST).AND.
& (LWORK .LT. MAX0(7,4*N+1,2*M+N))) .OR.
- & (.NOT.(LSVEC .OR. LSVEC) .AND. ERREST .AND.
+ & (.NOT.(LSVEC .OR. RSVEC) .AND. ERREST .AND.
& (LWORK .LT. MAX0(7,4*N+N*N,2*M+N))) .OR.
- & (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. MAX0(7,2*N+M))) .OR.
- & (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. MAX0(7,2*N+M))) .OR.
- & (LSVEC .AND. RSVEC .AND. .NOT.JRACC .AND. (LWORK.LT.6*N+2*N*N))
- & .OR. (LSVEC.AND.RSVEC.AND.JRACC.AND.LWORK.LT.MAX0(7,M+3*N+N*N)))
+ & (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. MAX0(7,2*M+N,4*N+1)))
+ & .OR.
+ & (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. MAX0(7,2*M+N,4*N+1)))
+ & .OR.
+ & (LSVEC .AND. RSVEC .AND. (.NOT.JRACC) .AND.
+ & (LWORK.LT.MAX0(2*M+N,6*N+2*N*N)))
+ & .OR. (LSVEC .AND. RSVEC .AND. JRACC .AND.
+ & LWORK.LT.MAX0(2*M+N,4*N+N*N,2*N+N*N+6)))
& THEN
INFO = - 17
ELSE
@@ -454,6 +584,7 @@
IF ( INFO .NE. 0 ) THEN
* #:(
CALL XERBLA( 'DGEJSV', - INFO )
+ RETURN
END IF
*
* Quick return for void matrix (Y3K safe)
@@ -471,7 +602,6 @@
*
*! NOTE: Make sure DLAMCH() does not fail on the target architecture.
*
-
EPSLN = DLAMCH('Epsilon')
SFMIN = DLAMCH('SafeMinimum')
SMALL = SFMIN / EPSLN
@@ -536,6 +666,7 @@
END IF
IWORK(1) = 0
IWORK(2) = 0
+ IWORK(3) = 0
RETURN
END IF
*
@@ -834,8 +965,8 @@
TEMP1 = DSQRT(SFMIN)
DO 3401 p = 2, N
IF ( ( DABS(A(p,p)) .LT. (EPSLN*DABS(A(p-1,p-1))) ) .OR.
- & ( DABS(A(p,p)) .LT. SMALL ) .OR.
- & ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402
+ $ ( DABS(A(p,p)) .LT. SMALL ) .OR.
+ $ ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402
NR = NR + 1
3401 CONTINUE
3402 CONTINUE
@@ -851,7 +982,7 @@
TEMP1 = DSQRT(SFMIN)
DO 3301 p = 2, N
IF ( ( DABS(A(p,p)) .LT. SMALL ) .OR.
- & ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302
+ $ ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302
NR = NR + 1
3301 CONTINUE
3302 CONTINUE
@@ -883,7 +1014,7 @@
CALL DSCAL( p, ONE/TEMP1, V(1,p), 1 )
3053 CONTINUE
CALL DPOCON( 'U', N, V, LDV, ONE, TEMP1,
- & WORK(N+1), IWORK(2*N+M+1), IERR )
+ $ WORK(N+1), IWORK(2*N+M+1), IERR )
ELSE IF ( LSVEC ) THEN
* .. U is available as workspace
CALL DLACPY( 'U', N, N, A, LDA, U, LDU )
@@ -892,7 +1023,7 @@
CALL DSCAL( p, ONE/TEMP1, U(1,p), 1 )
3054 CONTINUE
CALL DPOCON( 'U', N, U, LDU, ONE, TEMP1,
- & WORK(N+1), IWORK(2*N+M+1), IERR )
+ $ WORK(N+1), IWORK(2*N+M+1), IERR )
ELSE
CALL DLACPY( 'U', N, N, A, LDA, WORK(N+1), N )
DO 3052 p = 1, N
@@ -901,7 +1032,7 @@
3052 CONTINUE
* .. the columns of R are scaled to have unit Euclidean lengths.
CALL DPOCON( 'U', N, WORK(N+1), N, ONE, TEMP1,
- & WORK(N+N*N+1), IWORK(2*N+M+1), IERR )
+ $ WORK(N+N*N+1), IWORK(2*N+M+1), IERR )
END IF
SCONDA = ONE / DSQRT(TEMP1)
* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
@@ -916,7 +1047,6 @@
*
* Phase 3:
*
-
IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN
*
* Singular Values only
@@ -947,8 +1077,8 @@
TEMP1 = XSC*DABS(A(q,q))
DO 4949 p = 1, N
IF ( ( (p.GT.q) .AND. (DABS(A(p,q)).LE.TEMP1) )
- & .OR. ( p .LT. q ) )
- & A(p,q) = DSIGN( TEMP1, A(p,q) )
+ $ .OR. ( p .LT. q ) )
+ $ A(p,q) = DSIGN( TEMP1, A(p,q) )
4949 CONTINUE
4947 CONTINUE
ELSE
@@ -977,8 +1107,8 @@
TEMP1 = XSC*DABS(A(q,q))
DO 1949 p = 1, NR
IF ( ( (p.GT.q) .AND. (DABS(A(p,q)).LE.TEMP1) )
- & .OR. ( p .LT. q ) )
- & A(p,q) = DSIGN( TEMP1, A(p,q) )
+ $ .OR. ( p .LT. q ) )
+ $ A(p,q) = DSIGN( TEMP1, A(p,q) )
1949 CONTINUE
1947 CONTINUE
ELSE
@@ -990,7 +1120,7 @@
* the part which destroys triangular form (confusing?!))
*
CALL DGESVJ( 'L', 'NoU', 'NoV', NR, NR, A, LDA, SVA,
- & N, V, LDV, WORK, LWORK, INFO )
+ $ N, V, LDV, WORK, LWORK, INFO )
*
SCALEM = WORK(1)
NUMRANK = IDNINT(WORK(2))
@@ -1009,7 +1139,7 @@
CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
*
CALL DGESVJ( 'L','U','N', N, NR, V,LDV, SVA, NR, A,LDA,
- & WORK, LWORK, INFO )
+ $ WORK, LWORK, INFO )
SCALEM = WORK(1)
NUMRANK = IDNINT(WORK(2))
@@ -1023,14 +1153,14 @@
CALL DLACPY( 'Lower', NR, NR, A, LDA, V, LDV )
CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
CALL DGEQRF( NR, NR, V, LDV, WORK(N+1), WORK(2*N+1),
- & LWORK-2*N, IERR )
+ $ LWORK-2*N, IERR )
DO 8998 p = 1, NR
CALL DCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 )
8998 CONTINUE
CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
*
CALL DGESVJ( 'Lower', 'U','N', NR, NR, V,LDV, SVA, NR, U,
- & LDU, WORK(N+1), LWORK, INFO )
+ $ LDU, WORK(N+1), LWORK, INFO )
SCALEM = WORK(N+1)
NUMRANK = IDNINT(WORK(N+2))
IF ( NR .LT. N ) THEN
@@ -1040,7 +1170,7 @@
END IF
*
CALL DORMLQ( 'Left', 'Transpose', N, N, NR, A, LDA, WORK,
- & V, LDV, WORK(N+1), LWORK-N, IERR )
+ $ V, LDV, WORK(N+1), LWORK-N, IERR )
*
END IF
*
@@ -1065,7 +1195,7 @@
CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU )
*
CALL DGEQRF( N, NR, U, LDU, WORK(N+1), WORK(2*N+1),
- & LWORK-2*N, IERR )
+ $ LWORK-2*N, IERR )
*
DO 1967 p = 1, NR - 1
CALL DCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 )
@@ -1073,7 +1203,7 @@
CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU )
*
CALL DGESVJ( 'Lower', 'U', 'N', NR,NR, U, LDU, SVA, NR, A,
- & LDA, WORK(N+1), LWORK-N, INFO )
+ $ LDA, WORK(N+1), LWORK-N, INFO )
SCALEM = WORK(N+1)
NUMRANK = IDNINT(WORK(N+2))
*
@@ -1086,10 +1216,10 @@
END IF
*
CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
- & LDU, WORK(N+1), LWORK-N, IERR )
+ $ LDU, WORK(N+1), LWORK-N, IERR )
*
IF ( ROWPIV )
- & CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
+ $ CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
*
DO 1974 p = 1, N1
XSC = ONE / DNRM2( M, U(1,p), 1 )
@@ -1137,9 +1267,9 @@
TEMP1 = XSC*DABS( V(q,q) )
DO 2968 p = 1, N
IF ( ( p .GT. q ) .AND. ( DABS(V(p,q)) .LE. TEMP1 )
- & .OR. ( p .LT. q ) )
- & V(p,q) = DSIGN( TEMP1, V(p,q) )
- IF ( p. LT. q ) V(p,q) = - V(p,q)
+ $ .OR. ( p .LT. q ) )
+ $ V(p,q) = DSIGN( TEMP1, V(p,q) )
+ IF ( p .LT. q ) V(p,q) = - V(p,q)
2968 CONTINUE
2969 CONTINUE
ELSE
@@ -1156,7 +1286,7 @@
CALL DSCAL(NR-p+1,ONE/TEMP1,WORK(2*N+(p-1)*NR+p),1)
3950 CONTINUE
CALL DPOCON('Lower',NR,WORK(2*N+1),NR,ONE,TEMP1,
- & WORK(2*N+NR*NR+1),IWORK(M+2*N+1),IERR)
+ $ WORK(2*N+NR*NR+1),IWORK(M+2*N+1),IERR)
CONDR1 = ONE / DSQRT(TEMP1)
* .. here need a second oppinion on the condition number
* .. then assume worst case scenario
@@ -1172,7 +1302,7 @@
* of a lower triangular matrix.
* R1^t = Q2 * R2
CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
- & LWORK-2*N, IERR )
+ $ LWORK-2*N, IERR )
*
IF ( L2PERT ) THEN
XSC = DSQRT(SMALL)/EPSLN
@@ -1180,14 +1310,14 @@
DO 3958 q = 1, p - 1
TEMP1 = XSC * DMIN1(DABS(V(p,p)),DABS(V(q,q)))
IF ( DABS(V(q,p)) .LE. TEMP1 )
- & V(q,p) = DSIGN( TEMP1, V(q,p) )
+ $ V(q,p) = DSIGN( TEMP1, V(q,p) )
3958 CONTINUE
3959 CONTINUE
END IF
*
IF ( NR .NE. N )
+ $ CALL DLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N )
* .. save ...
- & CALL DLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N )
*
* .. this transposed copy should be better than naive
DO 1969 p = 1, NR - 1
@@ -1210,16 +1340,16 @@
IWORK(N+p) = 0
3003 CONTINUE
CALL DGEQP3( N, NR, V, LDV, IWORK(N+1), WORK(N+1),
- & WORK(2*N+1), LWORK-2*N, IERR )
+ $ WORK(2*N+1), LWORK-2*N, IERR )
** CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
-** & LWORK-2*N, IERR )
+** $ LWORK-2*N, IERR )
IF ( L2PERT ) THEN
XSC = DSQRT(SMALL)
DO 3969 p = 2, NR
DO 3968 q = 1, p - 1
TEMP1 = XSC * DMIN1(DABS(V(p,p)),DABS(V(q,q)))
IF ( DABS(V(q,p)) .LE. TEMP1 )
- & V(q,p) = DSIGN( TEMP1, V(q,p) )
+ $ V(q,p) = DSIGN( TEMP1, V(q,p) )
3968 CONTINUE
3969 CONTINUE
END IF
@@ -1239,7 +1369,7 @@
END IF
* Now, compute R2 = L3 * Q3, the LQ factorization.
CALL DGELQF( NR, NR, V, LDV, WORK(2*N+N*NR+1),
- & WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR )
+ $ WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR )
* .. and estimate the condition number
CALL DLACPY( 'L',NR,NR,V,LDV,WORK(2*N+N*NR+NR+1),NR )
DO 4950 p = 1, NR
@@ -1247,7 +1377,7 @@
CALL DSCAL( p, ONE/TEMP1, WORK(2*N+N*NR+NR+p), NR )
4950 CONTINUE
CALL DPOCON( 'L',NR,WORK(2*N+N*NR+NR+1),NR,ONE,TEMP1,
- & WORK(2*N+N*NR+NR+NR*NR+1),IWORK(M+2*N+1),IERR )
+ $ WORK(2*N+N*NR+NR+NR*NR+1),IWORK(M+2*N+1),IERR )
CONDR2 = ONE / DSQRT(TEMP1)
*
IF ( CONDR2 .GE. COND_OK ) THEN
@@ -1284,7 +1414,7 @@
IF ( CONDR1 .LT. COND_OK ) THEN
*
CALL DGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U,
- & LDU,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,INFO )
+ $ LDU,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,INFO )
SCALEM = WORK(2*N+N*NR+NR+1)
NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
DO 3970 p = 1, NR
@@ -1294,7 +1424,7 @@
* .. pick the right matrix equation and solve it
*
- IF ( NR. EQ. N ) THEN
+ IF ( NR .EQ. N ) THEN
* :)) .. best case, R1 is inverted. The solution of this matrix
* equation is Q2*V2 = the product of the Jacobi rotations
* used in DGESVJ, premultiplied with the orthogonal matrix
@@ -1306,14 +1436,14 @@
* used in DGESVJ. The Q-factor from the second QR
* factorization is then built in explicitly.
CALL DTRSM('L','U','T','N',NR,NR,ONE,WORK(2*N+1),
- & N,V,LDV)
+ $ N,V,LDV)
IF ( NR .LT. N ) THEN
CALL DLASET('A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV)
CALL DLASET('A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV)
CALL DLASET('A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV)
END IF
CALL DORMQR('L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
- & V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR)
+ $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR)
END IF
*
ELSE IF ( CONDR2 .LT. COND_OK ) THEN
@@ -1325,7 +1455,7 @@
* the lower triangular L3 from the LQ factorization of
* R2=L3*Q3), pre-multiplied with the transposed Q3.
CALL DGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U,
- & LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
+ $ LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
SCALEM = WORK(2*N+N*NR+NR+1)
NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
DO 3870 p = 1, NR
@@ -1348,7 +1478,7 @@
CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV )
END IF
CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
- & V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
+ $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
ELSE
* Last line of defense.
* #:( This is a rather pathological case: no scaled condition
@@ -1362,7 +1492,7 @@
* Compute the full SVD of L3 using DGESVJ with explicit
* accumulation of Jacobi rotations.
CALL DGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U,
- & LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
+ $ LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
SCALEM = WORK(2*N+N*NR+NR+1)
NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
IF ( NR .LT. N ) THEN
@@ -1371,11 +1501,11 @@
CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV )
END IF
CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
- & V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
+ $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
*
CALL DORMLQ( 'L', 'T', NR, NR, NR, WORK(2*N+1), N,
- & WORK(2*N+N*NR+1), U, LDU, WORK(2*N+N*NR+NR+1),
- & LWORK-2*N-N*NR-NR, IERR )
+ $ WORK(2*N+N*NR+1), U, LDU, WORK(2*N+N*NR+NR+1),
+ $ LWORK-2*N-N*NR-NR, IERR )
DO 773 q = 1, NR
DO 772 p = 1, NR
WORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q)
@@ -1401,7 +1531,7 @@
973 CONTINUE
XSC = ONE / DNRM2( N, V(1,q), 1 )
IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
- & CALL DSCAL( N, XSC, V(1,q), 1 )
+ $ CALL DSCAL( N, XSC, V(1,q), 1 )
1972 CONTINUE
* At this moment, V contains the right singular vectors of A.
* Next, assemble the left singular vector matrix U (M x N).
@@ -1417,21 +1547,21 @@
* matrix U. This applies to all cases.
*
CALL DORMQR( 'Left', 'No_Tr', M, N1, N, A, LDA, WORK, U,
- & LDU, WORK(N+1), LWORK-N, IERR )
+ $ LDU, WORK(N+1), LWORK-N, IERR )
* The columns of U are normalized. The cost is O(M*N) flops.
TEMP1 = DSQRT(DBLE(M)) * EPSLN
DO 1973 p = 1, NR
XSC = ONE / DNRM2( M, U(1,p), 1 )
IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
- & CALL DSCAL( M, XSC, U(1,p), 1 )
+ $ CALL DSCAL( M, XSC, U(1,p), 1 )
1973 CONTINUE
*
* If the initial QRF is computed with row pivoting, the left
* singular vectors must be adjusted.
*
IF ( ROWPIV )
- & CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
+ $ CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
*
ELSE
*
@@ -1452,7 +1582,7 @@
END IF
*
CALL DGESVJ( 'Upper', 'U', 'N', N, N, WORK(N+1), N, SVA,
- & N, U, LDU, WORK(N+N*N+1), LWORK-N-N*N, INFO )
+ $ N, U, LDU, WORK(N+N*N+1), LWORK-N-N*N, INFO )
*
SCALEM = WORK(N+N*N+1)
NUMRANK = IDNINT(WORK(N+N*N+2))
@@ -1462,7 +1592,7 @@
6970 CONTINUE
*
CALL DTRSM( 'Left', 'Upper', 'NoTrans', 'No UD', N, N,
- & ONE, A, LDA, WORK(N+1), N )
+ $ ONE, A, LDA, WORK(N+1), N )
DO 6972 p = 1, N
CALL DCOPY( N, WORK(N+p), N, V(IWORK(p),1), LDV )
6972 CONTINUE
@@ -1470,7 +1600,7 @@
DO 6971 p = 1, N
XSC = ONE / DNRM2( N, V(1,p), 1 )
IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
- & CALL DSCAL( N, XSC, V(1,p), 1 )
+ $ CALL DSCAL( N, XSC, V(1,p), 1 )
6971 CONTINUE
*
* Assemble the left singular vector matrix U (M x N).
@@ -1483,16 +1613,16 @@
END IF
END IF
CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
- & LDU, WORK(N+1), LWORK-N, IERR )
+ $ LDU, WORK(N+1), LWORK-N, IERR )
TEMP1 = DSQRT(DBLE(M))*EPSLN
DO 6973 p = 1, N1
XSC = ONE / DNRM2( M, U(1,p), 1 )
IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
- & CALL DSCAL( M, XSC, U(1,p), 1 )
+ $ CALL DSCAL( M, XSC, U(1,p), 1 )
6973 CONTINUE
*
IF ( ROWPIV )
- & CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
+ $ CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
*
END IF
*
@@ -1520,9 +1650,9 @@
TEMP1 = XSC*DABS( V(q,q) )
DO 5968 p = 1, N
IF ( ( p .GT. q ) .AND. ( DABS(V(p,q)) .LE. TEMP1 )
- & .OR. ( p .LT. q ) )
- & V(p,q) = DSIGN( TEMP1, V(p,q) )
- IF ( p. LT. q ) V(p,q) = - V(p,q)
+ $ .OR. ( p .LT. q ) )
+ $ V(p,q) = DSIGN( TEMP1, V(p,q) )
+ IF ( p .LT. q ) V(p,q) = - V(p,q)
5968 CONTINUE
5969 CONTINUE
ELSE
@@ -1530,7 +1660,7 @@
END IF
CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
- & LWORK-2*N, IERR )
+ $ LWORK-2*N, IERR )
CALL DLACPY( 'L', N, NR, V, LDV, WORK(2*N+1), N )
*
DO 7969 p = 1, NR
@@ -1550,7 +1680,7 @@
END IF
CALL DGESVJ( 'G', 'U', 'V', NR, NR, U, LDU, SVA,
- & N, V, LDV, WORK(2*N+N*NR+1), LWORK-2*N-N*NR, INFO )
+ $ N, V, LDV, WORK(2*N+N*NR+1), LWORK-2*N-N*NR, INFO )
SCALEM = WORK(2*N+N*NR+1)
NUMRANK = IDNINT(WORK(2*N+N*NR+2))
@@ -1561,7 +1691,7 @@
END IF
CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
- & V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
+ $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
*
* Permute the rows of V using the (column) permutation from the
* first QRF. Also, scale the columns to make them unit in
@@ -1577,7 +1707,7 @@
8973 CONTINUE
XSC = ONE / DNRM2( N, V(1,q), 1 )
IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
- & CALL DSCAL( N, XSC, V(1,q), 1 )
+ $ CALL DSCAL( N, XSC, V(1,q), 1 )
7972 CONTINUE
*
* At this moment, V contains the right singular vectors of A.
@@ -1592,10 +1722,10 @@
END IF
*
CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
- & LDU, WORK(N+1), LWORK-N, IERR )
+ $ LDU, WORK(N+1), LWORK-N, IERR )
*
IF ( ROWPIV )
- & CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
+ $ CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
*
*
END IF