--- rpl/lapack/lapack/dgejsv.f	2010/12/21 13:48:05	1.4
+++ rpl/lapack/lapack/dgejsv.f	2020/05/21 21:45:56	1.20
@@ -1,20 +1,485 @@
-      SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
-     &                   M, N, A, LDA, SVA, U, LDU, V, LDV,
-     &                   WORK, LWORK, IWORK, INFO )
+*> \brief \b DGEJSV
+*
+*  =========== DOCUMENTATION ===========
 *
-*  -- LAPACK routine (version 3.3.0)                                    --
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
-*  -- Contributed by Zlatko Drmac of the University of Zagreb and     --
-*  -- Kresimir Veselic of the Fernuniversitaet Hagen                  --
-*     November 2010
+*> \htmlonly
+*> Download DGEJSV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgejsv.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgejsv.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgejsv.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
+*                          M, N, A, LDA, SVA, U, LDU, V, LDV,
+*                          WORK, LWORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       IMPLICIT    NONE
+*       INTEGER     INFO, LDA, LDU, LDV, LWORK, M, N
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ),
+*      $            WORK( LWORK )
+*       INTEGER     IWORK( * )
+*       CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGEJSV computes the singular value decomposition (SVD) of a real M-by-N
+*> matrix [A], where M >= N. The SVD of [A] is written as
+*>
+*>              [A] = [U] * [SIGMA] * [V]^t,
+*>
+*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
+*> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
+*> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
+*> the singular values of [A]. The columns of [U] and [V] are the left and
+*> the right singular vectors of [A], respectively. The matrices [U] and [V]
+*> are computed and stored in the arrays U and V, respectively. The diagonal
+*> of [SIGMA] is computed and stored in the array SVA.
+*> DGEJSV can sometimes compute tiny singular values and their singular vectors much
+*> more accurately than other SVD routines, see below under Further Details.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] JOBA
+*> \verbatim
+*>          JOBA is CHARACTER*1
+*>        Specifies the level of accuracy:
+*>       = 'C': This option works well (high relative accuracy) if A = B * D,
+*>             with well-conditioned B and arbitrary diagonal matrix D.
+*>             The accuracy cannot be spoiled by COLUMN scaling. The
+*>             accuracy of the computed output depends on the condition of
+*>             B, and the procedure aims at the best theoretical accuracy.
+*>             The relative error max_{i=1:N}|d sigma_i| / sigma_i is
+*>             bounded by f(M,N)*epsilon* cond(B), independent of D.
+*>             The input matrix is preprocessed with the QRF with column
+*>             pivoting. This initial preprocessing and preconditioning by
+*>             a rank revealing QR factorization is common for all values of
+*>             JOBA. Additional actions are specified as follows:
+*>       = 'E': Computation as with 'C' with an additional estimate of the
+*>             condition number of B. It provides a realistic error bound.
+*>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
+*>             D1, D2, and well-conditioned matrix C, this option gives
+*>             higher accuracy than the 'C' option. If the structure of the
+*>             input matrix is not known, and relative accuracy is
+*>             desirable, then this option is advisable. The input matrix A
+*>             is preprocessed with QR factorization with FULL (row and
+*>             column) pivoting.
+*>       = 'G': Computation as with 'F' with an additional estimate of the
+*>             condition number of B, where A=D*B. If A has heavily weighted
+*>             rows, then using this condition number gives too pessimistic
+*>             error bound.
+*>       = 'A': Small singular values are the noise and the matrix is treated
+*>             as numerically rank deficient. The error in the computed
+*>             singular values is bounded by f(m,n)*epsilon*||A||.
+*>             The computed SVD A = U * S * V^t restores A up to
+*>             f(m,n)*epsilon*||A||.
+*>             This gives the procedure the licence to discard (set to zero)
+*>             all singular values below N*epsilon*||A||.
+*>       = 'R': Similar as in 'A'. Rank revealing property of the initial
+*>             QR factorization is used do reveal (using triangular factor)
+*>             a gap sigma_{r+1} < epsilon * sigma_r in which case the
+*>             numerical RANK is declared to be r. The SVD is computed with
+*>             absolute error bounds, but more accurately than with 'A'.
+*> \endverbatim
+*>
+*> \param[in] JOBU
+*> \verbatim
+*>          JOBU is CHARACTER*1
+*>        Specifies whether to compute the columns of U:
+*>       = 'U': N columns of U are returned in the array U.
+*>       = 'F': full set of M left sing. vectors is returned in the array U.
+*>       = 'W': U may be used as workspace of length M*N. See the description
+*>             of U.
+*>       = 'N': U is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBV
+*> \verbatim
+*>          JOBV is CHARACTER*1
+*>        Specifies whether to compute the matrix V:
+*>       = 'V': N columns of V are returned in the array V; Jacobi rotations
+*>             are not explicitly accumulated.
+*>       = 'J': N columns of V are returned in the array V, but they are
+*>             computed as the product of Jacobi rotations. This option is
+*>             allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
+*>       = 'W': V may be used as workspace of length N*N. See the description
+*>             of V.
+*>       = 'N': V is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBR
+*> \verbatim
+*>          JOBR is CHARACTER*1
+*>        Specifies the RANGE for the singular values. Issues the licence to
+*>        set to zero small positive singular values if they are outside
+*>        specified range. If A .NE. 0 is scaled so that the largest singular
+*>        value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
+*>        the licence to kill columns of A whose norm in c*A is less than
+*>        DSQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
+*>        where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
+*>       = 'N': Do not kill small columns of c*A. This option assumes that
+*>             BLAS and QR factorizations and triangular solvers are
+*>             implemented to work in that range. If the condition of A
+*>             is greater than BIG, use DGESVJ.
+*>       = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)]
+*>             (roughly, as described above). This option is recommended.
+*>                                            ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*>        For computing the singular values in the FULL range [SFMIN,BIG]
+*>        use DGESVJ.
+*> \endverbatim
+*>
+*> \param[in] JOBT
+*> \verbatim
+*>          JOBT is CHARACTER*1
+*>        If the matrix is square then the procedure may determine to use
+*>        transposed A if A^t seems to be better with respect to convergence.
+*>        If the matrix is not square, JOBT is ignored. This is subject to
+*>        changes in the future.
+*>        The decision is based on two values of entropy over the adjoint
+*>        orbit of A^t * A. See the descriptions of WORK(6) and WORK(7).
+*>       = 'T': transpose if entropy test indicates possibly faster
+*>        convergence of Jacobi process if A^t is taken as input. If A is
+*>        replaced with A^t, then the row pivoting is included automatically.
+*>       = 'N': do not speculate.
+*>        This option can be used to compute only the singular values, or the
+*>        full SVD (U, SIGMA and V). For only one set of singular vectors
+*>        (U or V), the caller should provide both U and V, as one of the
+*>        matrices is used as workspace if the matrix A is transposed.
+*>        The implementer can easily remove this constraint and make the
+*>        code more complicated. See the descriptions of U and V.
+*> \endverbatim
+*>
+*> \param[in] JOBP
+*> \verbatim
+*>          JOBP is CHARACTER*1
+*>        Issues the licence to introduce structured perturbations to drown
+*>        denormalized numbers. This licence should be active if the
+*>        denormals are poorly implemented, causing slow computation,
+*>        especially in cases of fast convergence (!). For details see [1,2].
+*>        For the sake of simplicity, this perturbations are included only
+*>        when the full SVD or only the singular values are requested. The
+*>        implementer/user can easily add the perturbation for the cases of
+*>        computing one set of singular vectors.
+*>       = 'P': introduce perturbation
+*>       = 'N': do not perturb
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>         The number of rows of the input matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         The number of columns of the input matrix A. M >= N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] SVA
+*> \verbatim
+*>          SVA is DOUBLE PRECISION array, dimension (N)
+*>          On exit,
+*>          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
+*>            computation SVA contains Euclidean column norms of the
+*>            iterated matrices in the array A.
+*>          - For WORK(1) .NE. WORK(2): The singular values of A are
+*>            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
+*>            sigma_max(A) overflows or if small singular values have been
+*>            saved from underflow by scaling the input matrix A.
+*>          - If JOBR='R' then some of the singular values may be returned
+*>            as exact zeros obtained by "set to zero" because they are
+*>            below the numerical rank threshold or are denormalized numbers.
+*> \endverbatim
+*>
+*> \param[out] U
+*> \verbatim
+*>          U is DOUBLE PRECISION array, dimension ( LDU, N )
+*>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
+*>                         the left singular vectors.
+*>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
+*>                         the left singular vectors, including an ONB
+*>                         of the orthogonal complement of the Range(A).
+*>          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
+*>                         then U is used as workspace if the procedure
+*>                         replaces A with A^t. In that case, [V] is computed
+*>                         in U as left singular vectors of A^t and then
+*>                         copied back to the V array. This 'W' option is just
+*>                         a reminder to the caller that in this case U is
+*>                         reserved as workspace of length N*N.
+*>          If JOBU = 'N'  U is not referenced, unless JOBT='T'.
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER
+*>          The leading dimension of the array U,  LDU >= 1.
+*>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
+*> \endverbatim
+*>
+*> \param[out] V
+*> \verbatim
+*>          V is DOUBLE PRECISION array, dimension ( LDV, N )
+*>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
+*>                         the right singular vectors;
+*>          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
+*>                         then V is used as workspace if the pprocedure
+*>                         replaces A with A^t. In that case, [U] is computed
+*>                         in V as right singular vectors of A^t and then
+*>                         copied back to the U array. This 'W' option is just
+*>                         a reminder to the caller that in this case V is
+*>                         reserved as workspace of length N*N.
+*>          If JOBV = 'N'  V is not referenced, unless JOBT='T'.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*>          LDV is INTEGER
+*>          The leading dimension of the array V,  LDV >= 1.
+*>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (LWORK)
+*>          On exit, if N > 0 .AND. M > 0 (else not referenced),
+*>          WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such
+*>                    that SCALE*SVA(1:N) are the computed singular values
+*>                    of A. (See the description of SVA().)
+*>          WORK(2) = See the description of WORK(1).
+*>          WORK(3) = SCONDA is an estimate for the condition number of
+*>                    column equilibrated A. (If JOBA = 'E' or 'G')
+*>                    SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
+*>                    It is computed using DPOCON. It holds
+*>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
+*>                    where R is the triangular factor from the QRF of A.
+*>                    However, if R is truncated and the numerical rank is
+*>                    determined to be strictly smaller than N, SCONDA is
+*>                    returned as -1, thus indicating that the smallest
+*>                    singular values might be lost.
+*>
+*>          If full SVD is needed, the following two condition numbers are
+*>          useful for the analysis of the algorithm. They are provied for
+*>          a developer/implementer who is familiar with the details of
+*>          the method.
+*>
+*>          WORK(4) = an estimate of the scaled condition number of the
+*>                    triangular factor in the first QR factorization.
+*>          WORK(5) = an estimate of the scaled condition number of the
+*>                    triangular factor in the second QR factorization.
+*>          The following two parameters are computed if JOBT = 'T'.
+*>          They are provided for a developer/implementer who is familiar
+*>          with the details of the method.
+*>
+*>          WORK(6) = the entropy of A^t*A :: this is the Shannon entropy
+*>                    of diag(A^t*A) / Trace(A^t*A) taken as point in the
+*>                    probability simplex.
+*>          WORK(7) = the entropy of A*A^t.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          Length of WORK to confirm proper allocation of work space.
+*>          LWORK depends on the job:
+*>
+*>          If only SIGMA is needed (JOBU = 'N', JOBV = 'N') and
+*>            -> .. no scaled condition estimate required (JOBE = 'N'):
+*>               LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
+*>               ->> For optimal performance (blocked code) the optimal value
+*>               is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
+*>               block size for DGEQP3 and DGEQRF.
+*>               In general, optimal LWORK is computed as
+*>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7).
+*>            -> .. an estimate of the scaled condition number of A is
+*>               required (JOBA='E', 'G'). In this case, LWORK is the maximum
+*>               of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7).
+*>               ->> For optimal performance (blocked code) the optimal value
+*>               is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7).
+*>               In general, the optimal length LWORK is computed as
+*>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF),
+*>                                                     N+N*N+LWORK(DPOCON),7).
+*>
+*>          If SIGMA and the right singular vectors are needed (JOBV = 'V'),
+*>            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
+*>            -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
+*>               where NB is the optimal block size for DGEQP3, DGEQRF, DGELQF,
+*>               DORMLQ. In general, the optimal length LWORK is computed as
+*>               LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON),
+*>                       N+LWORK(DGELQF), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)).
+*>
+*>          If SIGMA and the left singular vectors are needed
+*>            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
+*>            -> For optimal performance:
+*>               if JOBU = 'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
+*>               if JOBU = 'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7),
+*>               where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR.
+*>               In general, the optimal length LWORK is computed as
+*>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON),
+*>                        2*N+LWORK(DGEQRF), N+LWORK(DORMQR)).
+*>               Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or
+*>               M*NB (for JOBU = 'F').
+*>
+*>          If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and
+*>            -> if JOBV = 'V'
+*>               the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N).
+*>            -> if JOBV = 'J' the minimal requirement is
+*>               LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6).
+*>            -> For optimal performance, LWORK should be additionally
+*>               larger than N+M*NB, where NB is the optimal block size
+*>               for DORMQR.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (M+3*N).
+*>          On exit,
+*>          IWORK(1) = the numerical rank determined after the initial
+*>                     QR factorization with pivoting. See the descriptions
+*>                     of JOBA and JOBR.
+*>          IWORK(2) = the number of the computed nonzero singular values
+*>          IWORK(3) = if nonzero, a warning message:
+*>                     If IWORK(3) = 1 then some of the column norms of A
+*>                     were denormalized floats. The requested high accuracy
+*>                     is not warranted by the data.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>           < 0:  if INFO = -i, then the i-th argument had an illegal value.
+*>           = 0:  successful exit;
+*>           > 0:  DGEJSV  did not converge in the maximal allowed number
+*>                 of sweeps. The computed values may be inaccurate.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2016
+*
+*> \ingroup doubleGEsing
+*
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*>  DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses DGEQP3,
+*>  DGEQRF, and DGELQF as preprocessors and preconditioners. Optionally, an
+*>  additional row pivoting can be used as a preprocessor, which in some
+*>  cases results in much higher accuracy. An example is matrix A with the
+*>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
+*>  diagonal matrices and C is well-conditioned matrix. In that case, complete
+*>  pivoting in the first QR factorizations provides accuracy dependent on the
+*>  condition number of C, and independent of D1, D2. Such higher accuracy is
+*>  not completely understood theoretically, but it works well in practice.
+*>  Further, if A can be written as A = B*D, with well-conditioned B and some
+*>  diagonal D, then the high accuracy is guaranteed, both theoretically and
+*>  in software, independent of D. For more details see [1], [2].
+*>     The computational range for the singular values can be the full range
+*>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
+*>  & LAPACK routines called by DGEJSV are implemented to work in that range.
+*>  If that is not the case, then the restriction for safe computation with
+*>  the singular values in the range of normalized IEEE numbers is that the
+*>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
+*>  overflow. This code (DGEJSV) is best used in this restricted range,
+*>  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
+*>  returned as zeros. See JOBR for details on this.
+*>     Further, this implementation is somewhat slower than the one described
+*>  in [1,2] due to replacement of some non-LAPACK components, and because
+*>  the choice of some tuning parameters in the iterative part (DGESVJ) is
+*>  left to the implementer on a particular machine.
+*>     The rank revealing QR factorization (in this code: DGEQP3) should be
+*>  implemented as in [3]. We have a new version of DGEQP3 under development
+*>  that is more robust than the current one in LAPACK, with a cleaner cut in
+*>  rank deficient cases. It will be available in the SIGMA library [4].
+*>  If M is much larger than N, it is obvious that the initial QRF with
+*>  column pivoting can be preprocessed by the QRF without pivoting. That
+*>  well known trick is not used in DGEJSV because in some cases heavy row
+*>  weighting can be treated with complete pivoting. The overhead in cases
+*>  M much larger than N is then only due to pivoting, but the benefits in
+*>  terms of accuracy have prevailed. The implementer/user can incorporate
+*>  this extra QRF step easily. The implementer can also improve data movement
+*>  (matrix transpose, matrix copy, matrix transposed copy) - this
+*>  implementation of DGEJSV uses only the simplest, naive data movement.
+*> \endverbatim
+*
+*> \par Contributors:
+*  ==================
+*>
+*>  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
+*
+*> \par References:
+*  ================
+*>
+*> \verbatim
+*>
+*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
+*>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
+*>     LAPACK Working note 169.
+*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
+*>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
+*>     LAPACK Working note 170.
+*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
+*>     factorization software - a case study.
+*>     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
+*>     LAPACK Working note 176.
+*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
+*>     QSVD, (H,K)-SVD computations.
+*>     Department of Mathematics, University of Zagreb, 2008.
+*> \endverbatim
+*
+*>  \par Bugs, examples and comments:
+*   =================================
+*>
+*>  Please report all bugs and send interesting examples and/or comments to
+*>  drmac@math.hr. Thank you.
+*>
+*  =====================================================================
+      SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
+     $                   M, N, A, LDA, SVA, U, LDU, V, LDV,
+     $                   WORK, LWORK, IWORK, INFO )
 *
+*  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*
-* This routine is also part of SIGMA (version 1.23, October 23. 2008.)
-* SIGMA is a library of algorithms for highly accurate algorithms for
-* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the
-* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0.
+*     June 2016
 *
 *     .. Scalar Arguments ..
       IMPLICIT    NONE
@@ -22,349 +487,12 @@
 *     ..
 *     .. Array Arguments ..
       DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ),
-     &            WORK( LWORK )
+     $            WORK( LWORK )
       INTEGER     IWORK( * )
       CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
 *     ..
 *
-*  Purpose
-*  =======
-*
-*  DGEJSV computes the singular value decomposition (SVD) of a real M-by-N
-*  matrix [A], where M >= N. The SVD of [A] is written as
-*
-*               [A] = [U] * [SIGMA] * [V]^t,
-*
-*  where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
-*  diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
-*  [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
-*  the singular values of [A]. The columns of [U] and [V] are the left and
-*  the right singular vectors of [A], respectively. The matrices [U] and [V]
-*  are computed and stored in the arrays U and V, respectively. The diagonal
-*  of [SIGMA] is computed and stored in the array SVA.
-*
-*  Arguments
-*  =========
-*
-*  JOBA    (input) CHARACTER*1
-*        Specifies the level of accuracy:
-*       = 'C': This option works well (high relative accuracy) if A = B * D,
-*             with well-conditioned B and arbitrary diagonal matrix D.
-*             The accuracy cannot be spoiled by COLUMN scaling. The
-*             accuracy of the computed output depends on the condition of
-*             B, and the procedure aims at the best theoretical accuracy.
-*             The relative error max_{i=1:N}|d sigma_i| / sigma_i is
-*             bounded by f(M,N)*epsilon* cond(B), independent of D.
-*             The input matrix is preprocessed with the QRF with column
-*             pivoting. This initial preprocessing and preconditioning by
-*             a rank revealing QR factorization is common for all values of
-*             JOBA. Additional actions are specified as follows:
-*       = 'E': Computation as with 'C' with an additional estimate of the
-*             condition number of B. It provides a realistic error bound.
-*       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
-*             D1, D2, and well-conditioned matrix C, this option gives
-*             higher accuracy than the 'C' option. If the structure of the
-*             input matrix is not known, and relative accuracy is
-*             desirable, then this option is advisable. The input matrix A
-*             is preprocessed with QR factorization with FULL (row and
-*             column) pivoting.
-*       = 'G'  Computation as with 'F' with an additional estimate of the
-*             condition number of B, where A=D*B. If A has heavily weighted
-*             rows, then using this condition number gives too pessimistic
-*             error bound.
-*       = 'A': Small singular values are the noise and the matrix is treated
-*             as numerically rank defficient. The error in the computed
-*             singular values is bounded by f(m,n)*epsilon*||A||.
-*             The computed SVD A = U * S * V^t restores A up to
-*             f(m,n)*epsilon*||A||.
-*             This gives the procedure the licence to discard (set to zero)
-*             all singular values below N*epsilon*||A||.
-*       = 'R': Similar as in 'A'. Rank revealing property of the initial
-*             QR factorization is used do reveal (using triangular factor)
-*             a gap sigma_{r+1} < epsilon * sigma_r in which case the
-*             numerical RANK is declared to be r. The SVD is computed with
-*             absolute error bounds, but more accurately than with 'A'.
-*
-*  JOBU    (input) CHARACTER*1
-*        Specifies whether to compute the columns of U:
-*       = 'U': N columns of U are returned in the array U.
-*       = 'F': full set of M left sing. vectors is returned in the array U.
-*       = 'W': U may be used as workspace of length M*N. See the description
-*             of U.
-*       = 'N': U is not computed.
-*
-*  JOBV    (input) CHARACTER*1
-*        Specifies whether to compute the matrix V:
-*       = 'V': N columns of V are returned in the array V; Jacobi rotations
-*             are not explicitly accumulated.
-*       = 'J': N columns of V are returned in the array V, but they are
-*             computed as the product of Jacobi rotations. This option is
-*             allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
-*       = 'W': V may be used as workspace of length N*N. See the description
-*             of V.
-*       = 'N': V is not computed.
-*
-*  JOBR    (input) CHARACTER*1
-*        Specifies the RANGE for the singular values. Issues the licence to
-*        set to zero small positive singular values if they are outside
-*        specified range. If A .NE. 0 is scaled so that the largest singular
-*        value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
-*        the licence to kill columns of A whose norm in c*A is less than
-*        DSQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
-*        where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
-*       = 'N': Do not kill small columns of c*A. This option assumes that
-*             BLAS and QR factorizations and triangular solvers are
-*             implemented to work in that range. If the condition of A
-*             is greater than BIG, use DGESVJ.
-*       = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)]
-*             (roughly, as described above). This option is recommended.
-*                                            ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*        For computing the singular values in the FULL range [SFMIN,BIG]
-*        use DGESVJ.
-*
-*  JOBT    (input) CHARACTER*1
-*        If the matrix is square then the procedure may determine to use
-*        transposed A if A^t seems to be better with respect to convergence.
-*        If the matrix is not square, JOBT is ignored. This is subject to
-*        changes in the future.
-*        The decision is based on two values of entropy over the adjoint
-*        orbit of A^t * A. See the descriptions of WORK(6) and WORK(7).
-*       = 'T': transpose if entropy test indicates possibly faster
-*        convergence of Jacobi process if A^t is taken as input. If A is
-*        replaced with A^t, then the row pivoting is included automatically.
-*       = 'N': do not speculate.
-*        This option can be used to compute only the singular values, or the
-*        full SVD (U, SIGMA and V). For only one set of singular vectors
-*        (U or V), the caller should provide both U and V, as one of the
-*        matrices is used as workspace if the matrix A is transposed.
-*        The implementer can easily remove this constraint and make the
-*        code more complicated. See the descriptions of U and V.
-*
-*  JOBP    (input) CHARACTER*1
-*        Issues the licence to introduce structured perturbations to drown
-*        denormalized numbers. This licence should be active if the
-*        denormals are poorly implemented, causing slow computation,
-*        especially in cases of fast convergence (!). For details see [1,2].
-*        For the sake of simplicity, this perturbations are included only
-*        when the full SVD or only the singular values are requested. The
-*        implementer/user can easily add the perturbation for the cases of
-*        computing one set of singular vectors.
-*       = 'P': introduce perturbation
-*       = 'N': do not perturb
-*
-*  M       (input) INTEGER
-*         The number of rows of the input matrix A.  M >= 0.
-*
-*  N       (input) INTEGER
-*         The number of columns of the input matrix A. M >= N >= 0.
-*
-*  A       (input/workspace) DOUBLE PRECISION array, dimension (LDA,N)
-*          On entry, the M-by-N matrix A.
-*
-*  LDA     (input) INTEGER
-*          The leading dimension of the array A.  LDA >= max(1,M).
-*
-*  SVA     (workspace/output) DOUBLE PRECISION array, dimension (N)
-*          On exit,
-*          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
-*            computation SVA contains Euclidean column norms of the
-*            iterated matrices in the array A.
-*          - For WORK(1) .NE. WORK(2): The singular values of A are
-*            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
-*            sigma_max(A) overflows or if small singular values have been
-*            saved from underflow by scaling the input matrix A.
-*          - If JOBR='R' then some of the singular values may be returned
-*            as exact zeros obtained by "set to zero" because they are
-*            below the numerical rank threshold or are denormalized numbers.
-*
-*  U       (workspace/output) DOUBLE PRECISION array, dimension ( LDU, N )
-*          If JOBU = 'U', then U contains on exit the M-by-N matrix of
-*                         the left singular vectors.
-*          If JOBU = 'F', then U contains on exit the M-by-M matrix of
-*                         the left singular vectors, including an ONB
-*                         of the orthogonal complement of the Range(A).
-*          If JOBU = 'W'  .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
-*                         then U is used as workspace if the procedure
-*                         replaces A with A^t. In that case, [V] is computed
-*                         in U as left singular vectors of A^t and then
-*                         copied back to the V array. This 'W' option is just
-*                         a reminder to the caller that in this case U is
-*                         reserved as workspace of length N*N.
-*          If JOBU = 'N'  U is not referenced.
-*
-* LDU      (input) INTEGER
-*          The leading dimension of the array U,  LDU >= 1.
-*          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
-*
-*  V       (workspace/output) DOUBLE PRECISION array, dimension ( LDV, N )
-*          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
-*                         the right singular vectors;
-*          If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
-*                         then V is used as workspace if the pprocedure
-*                         replaces A with A^t. In that case, [U] is computed
-*                         in V as right singular vectors of A^t and then
-*                         copied back to the U array. This 'W' option is just
-*                         a reminder to the caller that in this case V is
-*                         reserved as workspace of length N*N.
-*          If JOBV = 'N'  V is not referenced.
-*
-*  LDV     (input) INTEGER
-*          The leading dimension of the array V,  LDV >= 1.
-*          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
-*
-*  WORK    (workspace/output) DOUBLE PRECISION array, dimension at least LWORK.
-*          On exit,
-*          WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such
-*                    that SCALE*SVA(1:N) are the computed singular values
-*                    of A. (See the description of SVA().)
-*          WORK(2) = See the description of WORK(1).
-*          WORK(3) = SCONDA is an estimate for the condition number of
-*                    column equilibrated A. (If JOBA .EQ. 'E' or 'G')
-*                    SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
-*                    It is computed using DPOCON. It holds
-*                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
-*                    where R is the triangular factor from the QRF of A.
-*                    However, if R is truncated and the numerical rank is
-*                    determined to be strictly smaller than N, SCONDA is
-*                    returned as -1, thus indicating that the smallest
-*                    singular values might be lost.
-*
-*          If full SVD is needed, the following two condition numbers are
-*          useful for the analysis of the algorithm. They are provied for
-*          a developer/implementer who is familiar with the details of
-*          the method.
-*
-*          WORK(4) = an estimate of the scaled condition number of the
-*                    triangular factor in the first QR factorization.
-*          WORK(5) = an estimate of the scaled condition number of the
-*                    triangular factor in the second QR factorization.
-*          The following two parameters are computed if JOBT .EQ. 'T'.
-*          They are provided for a developer/implementer who is familiar
-*          with the details of the method.
-*
-*          WORK(6) = the entropy of A^t*A :: this is the Shannon entropy
-*                    of diag(A^t*A) / Trace(A^t*A) taken as point in the
-*                    probability simplex.
-*          WORK(7) = the entropy of A*A^t.
-*
-*  LWORK   (input) INTEGER
-*          Length of WORK to confirm proper allocation of work space.
-*          LWORK depends on the job:
-*
-*          If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
-*            -> .. no scaled condition estimate required ( JOBE.EQ.'N'):
-*               LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
-*               For optimal performance (blocked code) the optimal value
-*               is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
-*               block size for xGEQP3/xGEQRF.
-*            -> .. an estimate of the scaled condition number of A is
-*               required (JOBA='E', 'G'). In this case, LWORK is the maximum
-*               of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4N,7).
-*
-*          If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
-*            -> the minimal requirement is LWORK >= max(2*N+M,7).
-*            -> For optimal performance, LWORK >= max(2*N+M,2*N+N*NB,7),
-*               where NB is the optimal block size.
-*
-*          If SIGMA and the left singular vectors are needed
-*            -> the minimal requirement is LWORK >= max(2*N+M,7).
-*            -> For optimal performance, LWORK >= max(2*N+M,2*N+N*NB,7),
-*               where NB is the optimal block size.
-*
-*          If full SVD is needed ( JOBU.EQ.'U' or 'F', JOBV.EQ.'V' ) and
-*            -> .. the singular vectors are computed without explicit
-*               accumulation of the Jacobi rotations, LWORK >= 6*N+2*N*N
-*            -> .. in the iterative part, the Jacobi rotations are
-*               explicitly accumulated (option, see the description of JOBV),
-*               then the minimal requirement is LWORK >= max(M+3*N+N*N,7).
-*               For better performance, if NB is the optimal block size,
-*               LWORK >= max(3*N+N*N+M,3*N+N*N+N*NB,7).
-*
-*  IWORK   (workspace/output) INTEGER array, dimension M+3*N.
-*          On exit,
-*          IWORK(1) = the numerical rank determined after the initial
-*                     QR factorization with pivoting. See the descriptions
-*                     of JOBA and JOBR.
-*          IWORK(2) = the number of the computed nonzero singular values
-*          IWORK(3) = if nonzero, a warning message:
-*                     If IWORK(3).EQ.1 then some of the column norms of A
-*                     were denormalized floats. The requested high accuracy
-*                     is not warranted by the data.
-*
-*  INFO    (output) INTEGER
-*           < 0  : if INFO = -i, then the i-th argument had an illegal value.
-*           = 0 :  successfull exit;
-*           > 0 :  DGEJSV  did not converge in the maximal allowed number
-*                  of sweeps. The computed values may be inaccurate.
-*
-*  Further Details
-*  ===============
-*
-*  DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3,
-*  SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an
-*  additional row pivoting can be used as a preprocessor, which in some
-*  cases results in much higher accuracy. An example is matrix A with the
-*  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
-*  diagonal matrices and C is well-conditioned matrix. In that case, complete
-*  pivoting in the first QR factorizations provides accuracy dependent on the
-*  condition number of C, and independent of D1, D2. Such higher accuracy is
-*  not completely understood theoretically, but it works well in practice.
-*  Further, if A can be written as A = B*D, with well-conditioned B and some
-*  diagonal D, then the high accuracy is guaranteed, both theoretically and
-*  in software, independent of D. For more details see [1], [2].
-*     The computational range for the singular values can be the full range
-*  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
-*  & LAPACK routines called by DGEJSV are implemented to work in that range.
-*  If that is not the case, then the restriction for safe computation with
-*  the singular values in the range of normalized IEEE numbers is that the
-*  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
-*  overflow. This code (DGEJSV) is best used in this restricted range,
-*  meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are
-*  returned as zeros. See JOBR for details on this.
-*     Further, this implementation is somewhat slower than the one described
-*  in [1,2] due to replacement of some non-LAPACK components, and because
-*  the choice of some tuning parameters in the iterative part (DGESVJ) is
-*  left to the implementer on a particular machine.
-*     The rank revealing QR factorization (in this code: SGEQP3) should be
-*  implemented as in [3]. We have a new version of SGEQP3 under development
-*  that is more robust than the current one in LAPACK, with a cleaner cut in
-*  rank defficient cases. It will be available in the SIGMA library [4].
-*  If M is much larger than N, it is obvious that the inital QRF with
-*  column pivoting can be preprocessed by the QRF without pivoting. That
-*  well known trick is not used in DGEJSV because in some cases heavy row
-*  weighting can be treated with complete pivoting. The overhead in cases
-*  M much larger than N is then only due to pivoting, but the benefits in
-*  terms of accuracy have prevailed. The implementer/user can incorporate
-*  this extra QRF step easily. The implementer can also improve data movement
-*  (matrix transpose, matrix copy, matrix transposed copy) - this
-*  implementation of DGEJSV uses only the simplest, naive data movement.
-*
-*  Contributors
-*
-*  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
-*
-*  References
-*
-* [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
-*     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
-*     LAPACK Working note 169.
-* [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
-*     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
-*     LAPACK Working note 170.
-* [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
-*     factorization software - a case study.
-*     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
-*     LAPACK Working note 176.
-* [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
-*     QSVD, (H,K)-SVD computations.
-*     Department of Mathematics, University of Zagreb, 2008.
-*
-*  Bugs, examples and comments
-* 
-*  Please report all bugs and send interesting examples and/or comments to
-*  drmac@math.hr. Thank you.
-*
-* ==========================================================================
+*  ===========================================================================
 *
 *     .. Local Parameters ..
       DOUBLE PRECISION   ZERO,  ONE
@@ -372,16 +500,15 @@
 *     ..
 *     .. Local Scalars ..
       DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
-     &        CONDR1, CONDR2, ENTRA,  ENTRAT, EPSLN,  MAXPRJ, SCALEM,
-     &        SCONDA, SFMIN,  SMALL,  TEMP1,  USCAL1, USCAL2, XSC
+     $        CONDR1, CONDR2, ENTRA,  ENTRAT, EPSLN,  MAXPRJ, SCALEM,
+     $        SCONDA, SFMIN,  SMALL,  TEMP1,  USCAL1, USCAL2, XSC
       INTEGER IERR,   N1,     NR,     NUMRANK,        p, q,   WARNING
       LOGICAL ALMORT, DEFR,   ERREST, GOSCAL, JRACC,  KILL,   LSVEC,
-     &        L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
-     &        NOSCAL, ROWPIV, RSVEC,  TRANSP
+     $        L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
+     $        NOSCAL, ROWPIV, RSVEC,  TRANSP
 *     ..
 *     .. Intrinsic Functions ..
-      INTRINSIC DABS,  DLOG, DMAX1, DMIN1, DBLE,
-     &          MAX0, MIN0, IDNINT,  DSIGN,  DSQRT
+      INTRINSIC DABS, DLOG, MAX, MIN, DBLE, IDNINT, DSIGN, DSQRT
 *     ..
 *     .. External Functions ..
       DOUBLE PRECISION  DLAMCH, DNRM2
@@ -391,8 +518,8 @@
 *     ..
 *     .. External Subroutines ..
       EXTERNAL  DCOPY,  DGELQF, DGEQP3, DGEQRF, DLACPY, DLASCL,
-     &          DLASET, DLASSQ, DLASWP, DORGQR, DORMLQ,
-     &          DORMQR, DPOCON, DSCAL,  DSWAP,  DTRSM,  XERBLA
+     $          DLASET, DLASSQ, DLASWP, DORGQR, DORMLQ,
+     $          DORMQR, DPOCON, DSCAL,  DSWAP,  DTRSM,  XERBLA
 *
       EXTERNAL  DGESVJ
 *     ..
@@ -412,13 +539,13 @@
       L2PERT = LSAME( JOBP, 'P' )
 *
       IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR.
-     &     ERREST .OR. LSAME( JOBA, 'C' ) )) THEN
+     $     ERREST .OR. LSAME( JOBA, 'C' ) )) THEN
          INFO = - 1
       ELSE IF ( .NOT.( LSVEC  .OR. LSAME( JOBU, 'N' ) .OR.
-     &                             LSAME( JOBU, 'W' )) ) THEN
+     $                             LSAME( JOBU, 'W' )) ) THEN
          INFO = - 2
       ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR.
-     &   LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN
+     $   LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN
          INFO = - 3
       ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) )    THEN
          INFO = - 4
@@ -435,15 +562,19 @@
       ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN
          INFO = - 13
       ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN
-         INFO = - 14
+         INFO = - 15
       ELSE IF ( (.NOT.(LSVEC .OR. RSVEC .OR. ERREST).AND.
-     &                           (LWORK .LT. MAX0(7,4*N+1,2*M+N))) .OR.
-     & (.NOT.(LSVEC .OR. LSVEC) .AND. ERREST .AND.
-     &                         (LWORK .LT. MAX0(7,4*N+N*N,2*M+N))) .OR.
-     & (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. MAX0(7,2*N+M))) .OR.
-     & (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. MAX0(7,2*N+M))) .OR.
-     & (LSVEC .AND. RSVEC .AND. .NOT.JRACC .AND. (LWORK.LT.6*N+2*N*N))
-     & .OR. (LSVEC.AND.RSVEC.AND.JRACC.AND.LWORK.LT.MAX0(7,M+3*N+N*N)))
+     &                           (LWORK .LT. MAX(7,4*N+1,2*M+N))) .OR.
+     & (.NOT.(LSVEC .OR. RSVEC) .AND. ERREST .AND.
+     &                         (LWORK .LT. MAX(7,4*N+N*N,2*M+N))) .OR.
+     & (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. MAX(7,2*M+N,4*N+1)))
+     & .OR.
+     & (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. MAX(7,2*M+N,4*N+1)))
+     & .OR.
+     & (LSVEC .AND. RSVEC .AND. (.NOT.JRACC) .AND.
+     &                          (LWORK.LT.MAX(2*M+N,6*N+2*N*N)))
+     & .OR. (LSVEC .AND. RSVEC .AND. JRACC .AND.
+     &                          LWORK.LT.MAX(2*M+N,4*N+N*N,2*N+N*N+6)))
      &   THEN
          INFO = - 17
       ELSE
@@ -454,11 +585,16 @@
       IF ( INFO .NE. 0 ) THEN
 *       #:(
          CALL XERBLA( 'DGEJSV', - INFO )
+         RETURN
       END IF
 *
 *     Quick return for void matrix (Y3K safe)
 * #:)
-      IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) RETURN
+      IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN
+         IWORK(1:3) = 0
+         WORK(1:7) = 0
+         RETURN
+      ENDIF
 *
 *     Determine whether the matrix U should be M x N or M x M
 *
@@ -471,7 +607,6 @@
 *
 *!    NOTE: Make sure DLAMCH() does not fail on the target architecture.
 *
-
       EPSLN = DLAMCH('Epsilon')
       SFMIN = DLAMCH('SafeMinimum')
       SMALL = SFMIN / EPSLN
@@ -514,8 +649,8 @@
       AAPP = ZERO
       AAQQ = BIG
       DO 4781 p = 1, N
-         AAPP = DMAX1( AAPP, SVA(p) )
-         IF ( SVA(p) .NE. ZERO ) AAQQ = DMIN1( AAQQ, SVA(p) )
+         AAPP = MAX( AAPP, SVA(p) )
+         IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) )
  4781 CONTINUE
 *
 *     Quick return for zero M x N matrix
@@ -536,6 +671,7 @@
          END IF
          IWORK(1) = 0
          IWORK(2) = 0
+         IWORK(3) = 0
          RETURN
       END IF
 *
@@ -584,6 +720,7 @@
             IWORK(1) = 0
             IWORK(2) = 0
          END IF
+         IWORK(3) = 0
          IF ( ERREST ) WORK(3) = ONE
          IF ( LSVEC .AND. RSVEC ) THEN
             WORK(4) = ONE
@@ -618,14 +755,14 @@
 *              in one pass through the vector
                WORK(M+N+p)  = XSC * SCALEM
                WORK(N+p)    = XSC * (SCALEM*DSQRT(TEMP1))
-               AATMAX = DMAX1( AATMAX, WORK(N+p) )
-               IF (WORK(N+p) .NE. ZERO) AATMIN = DMIN1(AATMIN,WORK(N+p))
+               AATMAX = MAX( AATMAX, WORK(N+p) )
+               IF (WORK(N+p) .NE. ZERO) AATMIN = MIN(AATMIN,WORK(N+p))
  1950       CONTINUE
          ELSE
             DO 1904 p = 1, M
                WORK(M+N+p) = SCALEM*DABS( A(p,IDAMAX(N,A(p,1),LDA)) )
-               AATMAX = DMAX1( AATMAX, WORK(M+N+p) )
-               AATMIN = DMIN1( AATMIN, WORK(M+N+p) )
+               AATMAX = MAX( AATMAX, WORK(M+N+p) )
+               AATMIN = MIN( AATMIN, WORK(M+N+p) )
  1904       CONTINUE
          END IF
 *
@@ -816,7 +953,7 @@
       IF ( L2ABER ) THEN
 *        Standard absolute error bound suffices. All sigma_i with
 *        sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
-*        agressive enforcement of lower numerical rank by introducing a
+*        aggressive enforcement of lower numerical rank by introducing a
 *        backward error of the order of N*EPSLN*||A||.
          TEMP1 = DSQRT(DBLE(N))*EPSLN
          DO 3001 p = 2, N
@@ -828,14 +965,14 @@
  3001    CONTINUE
  3002    CONTINUE
       ELSE IF ( L2RANK ) THEN
-*        .. similarly as above, only slightly more gentle (less agressive).
+*        .. similarly as above, only slightly more gentle (less aggressive).
 *        Sudden drop on the diagonal of R1 is used as the criterion for
-*        close-to-rank-defficient.
+*        close-to-rank-deficient.
          TEMP1 = DSQRT(SFMIN)
          DO 3401 p = 2, N
             IF ( ( DABS(A(p,p)) .LT. (EPSLN*DABS(A(p-1,p-1))) ) .OR.
-     &           ( DABS(A(p,p)) .LT. SMALL ) .OR.
-     &           ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402
+     $           ( DABS(A(p,p)) .LT. SMALL ) .OR.
+     $           ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402
             NR = NR + 1
  3401    CONTINUE
  3402    CONTINUE
@@ -851,7 +988,7 @@
          TEMP1  = DSQRT(SFMIN)
          DO 3301 p = 2, N
             IF ( ( DABS(A(p,p)) .LT. SMALL ) .OR.
-     &          ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302
+     $          ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302
             NR = NR + 1
  3301    CONTINUE
  3302    CONTINUE
@@ -863,7 +1000,7 @@
          MAXPRJ = ONE
          DO 3051 p = 2, N
             TEMP1  = DABS(A(p,p)) / SVA(IWORK(p))
-            MAXPRJ = DMIN1( MAXPRJ, TEMP1 )
+            MAXPRJ = MIN( MAXPRJ, TEMP1 )
  3051    CONTINUE
          IF ( MAXPRJ**2 .GE. ONE - DBLE(N)*EPSLN ) ALMORT = .TRUE.
       END IF
@@ -883,7 +1020,7 @@
                   CALL DSCAL( p, ONE/TEMP1, V(1,p), 1 )
  3053          CONTINUE
                CALL DPOCON( 'U', N, V, LDV, ONE, TEMP1,
-     &              WORK(N+1), IWORK(2*N+M+1), IERR )
+     $              WORK(N+1), IWORK(2*N+M+1), IERR )
             ELSE IF ( LSVEC ) THEN
 *              .. U is available as workspace
                CALL DLACPY( 'U', N, N, A, LDA, U, LDU )
@@ -892,7 +1029,7 @@
                   CALL DSCAL( p, ONE/TEMP1, U(1,p), 1 )
  3054          CONTINUE
                CALL DPOCON( 'U', N, U, LDU, ONE, TEMP1,
-     &              WORK(N+1), IWORK(2*N+M+1), IERR )
+     $              WORK(N+1), IWORK(2*N+M+1), IERR )
             ELSE
                CALL DLACPY( 'U', N, N, A, LDA, WORK(N+1), N )
                DO 3052 p = 1, N
@@ -901,7 +1038,7 @@
  3052          CONTINUE
 *           .. the columns of R are scaled to have unit Euclidean lengths.
                CALL DPOCON( 'U', N, WORK(N+1), N, ONE, TEMP1,
-     &              WORK(N+N*N+1), IWORK(2*N+M+1), IERR )
+     $              WORK(N+N*N+1), IWORK(2*N+M+1), IERR )
             END IF
             SCONDA = ONE / DSQRT(TEMP1)
 *           SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
@@ -916,13 +1053,12 @@
 *
 *     Phase 3:
 *
-
       IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN
 *
 *         Singular Values only
 *
 *         .. transpose A(1:NR,1:N)
-         DO 1946 p = 1, MIN0( N-1, NR )
+         DO 1946 p = 1, MIN( N-1, NR )
             CALL DCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 )
  1946    CONTINUE
 *
@@ -947,8 +1083,8 @@
                   TEMP1 = XSC*DABS(A(q,q))
                   DO 4949 p = 1, N
                      IF ( ( (p.GT.q) .AND. (DABS(A(p,q)).LE.TEMP1) )
-     &                    .OR. ( p .LT. q ) )
-     &                     A(p,q) = DSIGN( TEMP1, A(p,q) )
+     $                    .OR. ( p .LT. q ) )
+     $                     A(p,q) = DSIGN( TEMP1, A(p,q) )
  4949             CONTINUE
  4947          CONTINUE
             ELSE
@@ -977,8 +1113,8 @@
                   TEMP1 = XSC*DABS(A(q,q))
                   DO 1949 p = 1, NR
                      IF ( ( (p.GT.q) .AND. (DABS(A(p,q)).LE.TEMP1) )
-     &                       .OR. ( p .LT. q ) )
-     &                   A(p,q) = DSIGN( TEMP1, A(p,q) )
+     $                       .OR. ( p .LT. q ) )
+     $                   A(p,q) = DSIGN( TEMP1, A(p,q) )
  1949             CONTINUE
  1947          CONTINUE
             ELSE
@@ -990,7 +1126,7 @@
 *           the part which destroys triangular form (confusing?!))
 *
             CALL DGESVJ( 'L', 'NoU', 'NoV', NR, NR, A, LDA, SVA,
-     &                      N, V, LDV, WORK, LWORK, INFO )
+     $                      N, V, LDV, WORK, LWORK, INFO )
 *
             SCALEM  = WORK(1)
             NUMRANK = IDNINT(WORK(2))
@@ -1009,7 +1145,7 @@
             CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
 *
             CALL DGESVJ( 'L','U','N', N, NR, V,LDV, SVA, NR, A,LDA,
-     &                  WORK, LWORK, INFO )
+     $                  WORK, LWORK, INFO )
             SCALEM  = WORK(1)
             NUMRANK = IDNINT(WORK(2))
 
@@ -1023,14 +1159,14 @@
             CALL DLACPY( 'Lower', NR, NR, A, LDA, V, LDV )
             CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
             CALL DGEQRF( NR, NR, V, LDV, WORK(N+1), WORK(2*N+1),
-     &                   LWORK-2*N, IERR )
+     $                   LWORK-2*N, IERR )
             DO 8998 p = 1, NR
                CALL DCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 )
  8998       CONTINUE
             CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
 *
             CALL DGESVJ( 'Lower', 'U','N', NR, NR, V,LDV, SVA, NR, U,
-     &                  LDU, WORK(N+1), LWORK, INFO )
+     $                  LDU, WORK(N+1), LWORK, INFO )
             SCALEM  = WORK(N+1)
             NUMRANK = IDNINT(WORK(N+2))
             IF ( NR .LT. N ) THEN
@@ -1040,7 +1176,7 @@
             END IF
 *
          CALL DORMLQ( 'Left', 'Transpose', N, N, NR, A, LDA, WORK,
-     &               V, LDV, WORK(N+1), LWORK-N, IERR )
+     $               V, LDV, WORK(N+1), LWORK-N, IERR )
 *
          END IF
 *
@@ -1065,7 +1201,7 @@
          CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU )
 *
          CALL DGEQRF( N, NR, U, LDU, WORK(N+1), WORK(2*N+1),
-     &              LWORK-2*N, IERR )
+     $              LWORK-2*N, IERR )
 *
          DO 1967 p = 1, NR - 1
             CALL DCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 )
@@ -1073,7 +1209,7 @@
          CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU )
 *
          CALL DGESVJ( 'Lower', 'U', 'N', NR,NR, U, LDU, SVA, NR, A,
-     &        LDA, WORK(N+1), LWORK-N, INFO )
+     $        LDA, WORK(N+1), LWORK-N, INFO )
          SCALEM  = WORK(N+1)
          NUMRANK = IDNINT(WORK(N+2))
 *
@@ -1086,10 +1222,10 @@
          END IF
 *
          CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
-     &               LDU, WORK(N+1), LWORK-N, IERR )
+     $               LDU, WORK(N+1), LWORK-N, IERR )
 *
          IF ( ROWPIV )
-     &       CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
+     $       CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 *
          DO 1974 p = 1, N1
             XSC = ONE / DNRM2( M, U(1,p), 1 )
@@ -1137,9 +1273,9 @@
                   TEMP1 = XSC*DABS( V(q,q) )
                   DO 2968 p = 1, N
                      IF ( ( p .GT. q ) .AND. ( DABS(V(p,q)) .LE. TEMP1 )
-     &                   .OR. ( p .LT. q ) )
-     &                   V(p,q) = DSIGN( TEMP1, V(p,q) )
-                     IF ( p. LT. q ) V(p,q) = - V(p,q)
+     $                   .OR. ( p .LT. q ) )
+     $                   V(p,q) = DSIGN( TEMP1, V(p,q) )
+                     IF ( p .LT. q ) V(p,q) = - V(p,q)
  2968             CONTINUE
  2969          CONTINUE
             ELSE
@@ -1156,9 +1292,9 @@
                CALL DSCAL(NR-p+1,ONE/TEMP1,WORK(2*N+(p-1)*NR+p),1)
  3950       CONTINUE
             CALL DPOCON('Lower',NR,WORK(2*N+1),NR,ONE,TEMP1,
-     &                   WORK(2*N+NR*NR+1),IWORK(M+2*N+1),IERR)
+     $                   WORK(2*N+NR*NR+1),IWORK(M+2*N+1),IERR)
             CONDR1 = ONE / DSQRT(TEMP1)
-*           .. here need a second oppinion on the condition number
+*           .. here need a second opinion on the condition number
 *           .. then assume worst case scenario
 *           R1 is OK for inverse <=> CONDR1 .LT. DBLE(N)
 *           more conservative    <=> CONDR1 .LT. DSQRT(DBLE(N))
@@ -1172,22 +1308,22 @@
 *              of a lower triangular matrix.
 *              R1^t = Q2 * R2
                CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
-     &              LWORK-2*N, IERR )
+     $              LWORK-2*N, IERR )
 *
                IF ( L2PERT ) THEN
                   XSC = DSQRT(SMALL)/EPSLN
                   DO 3959 p = 2, NR
                      DO 3958 q = 1, p - 1
-                        TEMP1 = XSC * DMIN1(DABS(V(p,p)),DABS(V(q,q)))
+                        TEMP1 = XSC * MIN(DABS(V(p,p)),DABS(V(q,q)))
                         IF ( DABS(V(q,p)) .LE. TEMP1 )
-     &                     V(q,p) = DSIGN( TEMP1, V(q,p) )
+     $                     V(q,p) = DSIGN( TEMP1, V(q,p) )
  3958                CONTINUE
  3959             CONTINUE
                END IF
 *
                IF ( NR .NE. N )
+     $         CALL DLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N )
 *              .. save ...
-     &         CALL DLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N )
 *
 *           .. this transposed copy should be better than naive
                DO 1969 p = 1, NR - 1
@@ -1199,7 +1335,7 @@
             ELSE
 *
 *              .. ill-conditioned case: second QRF with pivoting
-*              Note that windowed pivoting would be equaly good
+*              Note that windowed pivoting would be equally good
 *              numerically, and more run-time efficient. So, in
 *              an optimal implementation, the next call to DGEQP3
 *              should be replaced with eg. CALL SGEQPX (ACM TOMS #782)
@@ -1210,16 +1346,16 @@
                   IWORK(N+p) = 0
  3003          CONTINUE
                CALL DGEQP3( N, NR, V, LDV, IWORK(N+1), WORK(N+1),
-     &                  WORK(2*N+1), LWORK-2*N, IERR )
+     $                  WORK(2*N+1), LWORK-2*N, IERR )
 **               CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
-**     &              LWORK-2*N, IERR )
+**     $              LWORK-2*N, IERR )
                IF ( L2PERT ) THEN
                   XSC = DSQRT(SMALL)
                   DO 3969 p = 2, NR
                      DO 3968 q = 1, p - 1
-                        TEMP1 = XSC * DMIN1(DABS(V(p,p)),DABS(V(q,q)))
+                        TEMP1 = XSC * MIN(DABS(V(p,p)),DABS(V(q,q)))
                         IF ( DABS(V(q,p)) .LE. TEMP1 )
-     &                     V(q,p) = DSIGN( TEMP1, V(q,p) )
+     $                     V(q,p) = DSIGN( TEMP1, V(q,p) )
  3968                CONTINUE
  3969             CONTINUE
                END IF
@@ -1230,7 +1366,7 @@
                   XSC = DSQRT(SMALL)
                   DO 8970 p = 2, NR
                      DO 8971 q = 1, p - 1
-                        TEMP1 = XSC * DMIN1(DABS(V(p,p)),DABS(V(q,q)))
+                        TEMP1 = XSC * MIN(DABS(V(p,p)),DABS(V(q,q)))
                         V(p,q) = - DSIGN( TEMP1, V(q,p) )
  8971                CONTINUE
  8970             CONTINUE
@@ -1239,7 +1375,7 @@
                END IF
 *              Now, compute R2 = L3 * Q3, the LQ factorization.
                CALL DGELQF( NR, NR, V, LDV, WORK(2*N+N*NR+1),
-     &               WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR )
+     $               WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR )
 *              .. and estimate the condition number
                CALL DLACPY( 'L',NR,NR,V,LDV,WORK(2*N+N*NR+NR+1),NR )
                DO 4950 p = 1, NR
@@ -1247,12 +1383,12 @@
                   CALL DSCAL( p, ONE/TEMP1, WORK(2*N+N*NR+NR+p), NR )
  4950          CONTINUE
                CALL DPOCON( 'L',NR,WORK(2*N+N*NR+NR+1),NR,ONE,TEMP1,
-     &              WORK(2*N+N*NR+NR+NR*NR+1),IWORK(M+2*N+1),IERR )
+     $              WORK(2*N+N*NR+NR+NR*NR+1),IWORK(M+2*N+1),IERR )
                CONDR2 = ONE / DSQRT(TEMP1)
 *
                IF ( CONDR2 .GE. COND_OK ) THEN
 *                 .. save the Householder vectors used for Q3
-*                 (this overwrittes the copy of R2, as it will not be
+*                 (this overwrites the copy of R2, as it will not be
 *                 needed in this branch, but it does not overwritte the
 *                 Huseholder vectors of Q2.).
                   CALL DLACPY( 'U', NR, NR, V, LDV, WORK(2*N+1), N )
@@ -1284,7 +1420,7 @@
             IF ( CONDR1 .LT. COND_OK ) THEN
 *
                CALL DGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U,
-     &              LDU,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,INFO )
+     $              LDU,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,INFO )
                SCALEM  = WORK(2*N+N*NR+NR+1)
                NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
                DO 3970 p = 1, NR
@@ -1294,7 +1430,7 @@
 
 *        .. pick the right matrix equation and solve it
 *
-               IF ( NR. EQ. N ) THEN
+               IF ( NR .EQ. N ) THEN
 * :))             .. best case, R1 is inverted. The solution of this matrix
 *                 equation is Q2*V2 = the product of the Jacobi rotations
 *                 used in DGESVJ, premultiplied with the orthogonal matrix
@@ -1306,14 +1442,14 @@
 *                 used in DGESVJ. The Q-factor from the second QR
 *                 factorization is then built in explicitly.
                   CALL DTRSM('L','U','T','N',NR,NR,ONE,WORK(2*N+1),
-     &                 N,V,LDV)
+     $                 N,V,LDV)
                   IF ( NR .LT. N ) THEN
                     CALL DLASET('A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV)
                     CALL DLASET('A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV)
                     CALL DLASET('A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV)
                   END IF
                   CALL DORMQR('L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
-     &                 V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR)
+     $                 V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR)
                END IF
 *
             ELSE IF ( CONDR2 .LT. COND_OK ) THEN
@@ -1325,7 +1461,7 @@
 *              the lower triangular L3 from the LQ factorization of
 *              R2=L3*Q3), pre-multiplied with the transposed Q3.
                CALL DGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U,
-     &              LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
+     $              LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
                SCALEM  = WORK(2*N+N*NR+NR+1)
                NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
                DO 3870 p = 1, NR
@@ -1348,7 +1484,7 @@
                   CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV )
                END IF
                CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
-     &              V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
+     $              V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
             ELSE
 *              Last line of defense.
 * #:(          This is a rather pathological case: no scaled condition
@@ -1362,7 +1498,7 @@
 *              Compute the full SVD of L3 using DGESVJ with explicit
 *              accumulation of Jacobi rotations.
                CALL DGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U,
-     &              LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
+     $              LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
                SCALEM  = WORK(2*N+N*NR+NR+1)
                NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
                IF ( NR .LT. N ) THEN
@@ -1371,11 +1507,11 @@
                   CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV )
                END IF
                CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
-     &              V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
+     $              V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
 *
                CALL DORMLQ( 'L', 'T', NR, NR, NR, WORK(2*N+1), N,
-     &              WORK(2*N+N*NR+1), U, LDU, WORK(2*N+N*NR+NR+1),
-     &              LWORK-2*N-N*NR-NR, IERR )
+     $              WORK(2*N+N*NR+1), U, LDU, WORK(2*N+N*NR+NR+1),
+     $              LWORK-2*N-N*NR-NR, IERR )
                DO 773 q = 1, NR
                   DO 772 p = 1, NR
                      WORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q)
@@ -1401,7 +1537,7 @@
   973          CONTINUE
                XSC = ONE / DNRM2( N, V(1,q), 1 )
                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
-     &           CALL DSCAL( N, XSC, V(1,q), 1 )
+     $           CALL DSCAL( N, XSC, V(1,q), 1 )
  1972       CONTINUE
 *           At this moment, V contains the right singular vectors of A.
 *           Next, assemble the left singular vector matrix U (M x N).
@@ -1417,21 +1553,21 @@
 *           matrix U. This applies to all cases.
 *
             CALL DORMQR( 'Left', 'No_Tr', M, N1, N, A, LDA, WORK, U,
-     &           LDU, WORK(N+1), LWORK-N, IERR )
+     $           LDU, WORK(N+1), LWORK-N, IERR )
 
 *           The columns of U are normalized. The cost is O(M*N) flops.
             TEMP1 = DSQRT(DBLE(M)) * EPSLN
             DO 1973 p = 1, NR
                XSC = ONE / DNRM2( M, U(1,p), 1 )
                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
-     &          CALL DSCAL( M, XSC, U(1,p), 1 )
+     $          CALL DSCAL( M, XSC, U(1,p), 1 )
  1973       CONTINUE
 *
 *           If the initial QRF is computed with row pivoting, the left
 *           singular vectors must be adjusted.
 *
             IF ( ROWPIV )
-     &          CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
+     $          CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 *
          ELSE
 *
@@ -1452,7 +1588,7 @@
             END IF
 *
             CALL DGESVJ( 'Upper', 'U', 'N', N, N, WORK(N+1), N, SVA,
-     &           N, U, LDU, WORK(N+N*N+1), LWORK-N-N*N, INFO )
+     $           N, U, LDU, WORK(N+N*N+1), LWORK-N-N*N, INFO )
 *
             SCALEM  = WORK(N+N*N+1)
             NUMRANK = IDNINT(WORK(N+N*N+2))
@@ -1462,7 +1598,7 @@
  6970       CONTINUE
 *
             CALL DTRSM( 'Left', 'Upper', 'NoTrans', 'No UD', N, N,
-     &           ONE, A, LDA, WORK(N+1), N )
+     $           ONE, A, LDA, WORK(N+1), N )
             DO 6972 p = 1, N
                CALL DCOPY( N, WORK(N+p), N, V(IWORK(p),1), LDV )
  6972       CONTINUE
@@ -1470,7 +1606,7 @@
             DO 6971 p = 1, N
                XSC = ONE / DNRM2( N, V(1,p), 1 )
                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
-     &            CALL DSCAL( N, XSC, V(1,p), 1 )
+     $            CALL DSCAL( N, XSC, V(1,p), 1 )
  6971       CONTINUE
 *
 *           Assemble the left singular vector matrix U (M x N).
@@ -1483,16 +1619,16 @@
                END IF
             END IF
             CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
-     &           LDU, WORK(N+1), LWORK-N, IERR )
+     $           LDU, WORK(N+1), LWORK-N, IERR )
             TEMP1 = DSQRT(DBLE(M))*EPSLN
             DO 6973 p = 1, N1
                XSC = ONE / DNRM2( M, U(1,p), 1 )
                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
-     &            CALL DSCAL( M, XSC, U(1,p), 1 )
+     $            CALL DSCAL( M, XSC, U(1,p), 1 )
  6973       CONTINUE
 *
             IF ( ROWPIV )
-     &         CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
+     $         CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 *
          END IF
 *
@@ -1502,7 +1638,7 @@
 *
 *        This branch deploys a preconditioned Jacobi SVD with explicitly
 *        accumulated rotations. It is included as optional, mainly for
-*        experimental purposes. It does perfom well, and can also be used.
+*        experimental purposes. It does perform well, and can also be used.
 *        In this implementation, this branch will be automatically activated
 *        if the  condition number sigma_max(A) / sigma_min(A) is predicted
 *        to be greater than the overflow threshold. This is because the
@@ -1520,9 +1656,9 @@
                TEMP1 = XSC*DABS( V(q,q) )
                DO 5968 p = 1, N
                   IF ( ( p .GT. q ) .AND. ( DABS(V(p,q)) .LE. TEMP1 )
-     &                .OR. ( p .LT. q ) )
-     &                V(p,q) = DSIGN( TEMP1, V(p,q) )
-                  IF ( p. LT. q ) V(p,q) = - V(p,q)
+     $                .OR. ( p .LT. q ) )
+     $                V(p,q) = DSIGN( TEMP1, V(p,q) )
+                  IF ( p .LT. q ) V(p,q) = - V(p,q)
  5968          CONTINUE
  5969       CONTINUE
          ELSE
@@ -1530,7 +1666,7 @@
          END IF
 
          CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
-     &        LWORK-2*N, IERR )
+     $        LWORK-2*N, IERR )
          CALL DLACPY( 'L', N, NR, V, LDV, WORK(2*N+1), N )
 *
          DO 7969 p = 1, NR
@@ -1541,7 +1677,7 @@
             XSC = DSQRT(SMALL/EPSLN)
             DO 9970 q = 2, NR
                DO 9971 p = 1, q - 1
-                  TEMP1 = XSC * DMIN1(DABS(U(p,p)),DABS(U(q,q)))
+                  TEMP1 = XSC * MIN(DABS(U(p,p)),DABS(U(q,q)))
                   U(p,q) = - DSIGN( TEMP1, U(q,p) )
  9971          CONTINUE
  9970       CONTINUE
@@ -1550,7 +1686,7 @@
          END IF
 
          CALL DGESVJ( 'G', 'U', 'V', NR, NR, U, LDU, SVA,
-     &        N, V, LDV, WORK(2*N+N*NR+1), LWORK-2*N-N*NR, INFO )
+     $        N, V, LDV, WORK(2*N+N*NR+1), LWORK-2*N-N*NR, INFO )
          SCALEM  = WORK(2*N+N*NR+1)
          NUMRANK = IDNINT(WORK(2*N+N*NR+2))
 
@@ -1561,7 +1697,7 @@
          END IF
 
          CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
-     &        V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
+     $        V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
 *
 *           Permute the rows of V using the (column) permutation from the
 *           first QRF. Also, scale the columns to make them unit in
@@ -1577,7 +1713,7 @@
  8973          CONTINUE
                XSC = ONE / DNRM2( N, V(1,q), 1 )
                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
-     &           CALL DSCAL( N, XSC, V(1,q), 1 )
+     $           CALL DSCAL( N, XSC, V(1,q), 1 )
  7972       CONTINUE
 *
 *           At this moment, V contains the right singular vectors of A.
@@ -1592,10 +1728,10 @@
          END IF
 *
          CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
-     &        LDU, WORK(N+1), LWORK-N, IERR )
+     $        LDU, WORK(N+1), LWORK-N, IERR )
 *
             IF ( ROWPIV )
-     &         CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
+     $         CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 *
 *
          END IF