File:  [local] / rpl / lapack / lapack / zgejsv.f
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Sat Aug 27 15:27:12 2016 UTC (7 years, 9 months ago) by bertrand
Branches: MAIN
CVS tags: HEAD

Mise à jour de lapack.

    1: *> \brief \b ZGEJSV
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at 
    6: *            http://www.netlib.org/lapack/explore-html/ 
    7: *
    8: *> \htmlonly
    9: *> Download ZGEJSV + dependencies 
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgejsv.f"> 
   11: *> [TGZ]</a> 
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgejsv.f"> 
   13: *> [ZIP]</a> 
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgejsv.f"> 
   15: *> [TXT]</a>
   16: *> \endhtmlonly 
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *     SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
   22: *                         M, N, A, LDA, SVA, U, LDU, V, LDV,
   23: *                         CWORK, LWORK, RWORK, LRWORK, IWORK, INFO )
   24:    25: *     .. Scalar Arguments ..
   26: *     IMPLICIT    NONE
   27: *     INTEGER     INFO, LDA, LDU, LDV, LWORK, M, N
   28: *     ..
   29: *     .. Array Arguments ..
   30: *     COMPLEX*16     A( LDA, * ),  U( LDU, * ), V( LDV, * ), CWORK( LWORK )
   31: *     DOUBLE PRECISION   SVA( N ), RWORK( LRWORK )      
   32: *     INTEGER     IWORK( * )
   33: *     CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
   34: *       ..
   35: *  
   36: *
   37: *> \par Purpose:
   38: *  =============
   39: *>
   40: *> \verbatim
   41: *>
   42: *> ZGEJSV computes the singular value decomposition (SVD) of a complex M-by-N
   43: *> matrix [A], where M >= N. The SVD of [A] is written as
   44: *>
   45: *>              [A] = [U] * [SIGMA] * [V]^*,
   46: *>
   47: *> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
   48: *> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and
   49: *> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are
   50: *> the singular values of [A]. The columns of [U] and [V] are the left and
   51: *> the right singular vectors of [A], respectively. The matrices [U] and [V]
   52: *> are computed and stored in the arrays U and V, respectively. The diagonal
   53: *> of [SIGMA] is computed and stored in the array SVA.
   54: *> \endverbatim
   55: *>
   56: *>  Arguments:
   57: *>  ==========
   58: *>
   59: *> \param[in] JOBA
   60: *> \verbatim
   61: *>          JOBA is CHARACTER*1
   62: *>         Specifies the level of accuracy:
   63: *>       = 'C': This option works well (high relative accuracy) if A = B * D,
   64: *>              with well-conditioned B and arbitrary diagonal matrix D.
   65: *>              The accuracy cannot be spoiled by COLUMN scaling. The
   66: *>              accuracy of the computed output depends on the condition of
   67: *>              B, and the procedure aims at the best theoretical accuracy.
   68: *>              The relative error max_{i=1:N}|d sigma_i| / sigma_i is
   69: *>              bounded by f(M,N)*epsilon* cond(B), independent of D.
   70: *>              The input matrix is preprocessed with the QRF with column
   71: *>              pivoting. This initial preprocessing and preconditioning by
   72: *>              a rank revealing QR factorization is common for all values of
   73: *>              JOBA. Additional actions are specified as follows:
   74: *>       = 'E': Computation as with 'C' with an additional estimate of the
   75: *>              condition number of B. It provides a realistic error bound.
   76: *>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
   77: *>              D1, D2, and well-conditioned matrix C, this option gives
   78: *>              higher accuracy than the 'C' option. If the structure of the
   79: *>              input matrix is not known, and relative accuracy is
   80: *>              desirable, then this option is advisable. The input matrix A
   81: *>              is preprocessed with QR factorization with FULL (row and
   82: *>              column) pivoting.
   83: *>       = 'G'  Computation as with 'F' with an additional estimate of the
   84: *>              condition number of B, where A=D*B. If A has heavily weighted
   85: *>              rows, then using this condition number gives too pessimistic
   86: *>              error bound.
   87: *>       = 'A': Small singular values are the noise and the matrix is treated
   88: *>              as numerically rank defficient. The error in the computed
   89: *>              singular values is bounded by f(m,n)*epsilon*||A||.
   90: *>              The computed SVD A = U * S * V^* restores A up to
   91: *>              f(m,n)*epsilon*||A||.
   92: *>              This gives the procedure the licence to discard (set to zero)
   93: *>              all singular values below N*epsilon*||A||.
   94: *>       = 'R': Similar as in 'A'. Rank revealing property of the initial
   95: *>              QR factorization is used do reveal (using triangular factor)
   96: *>              a gap sigma_{r+1} < epsilon * sigma_r in which case the
   97: *>              numerical RANK is declared to be r. The SVD is computed with
   98: *>              absolute error bounds, but more accurately than with 'A'.
   99: *> \endverbatim
  100: *> 
  101: *> \param[in] JOBU
  102: *> \verbatim
  103: *>          JOBU is CHARACTER*1
  104: *>         Specifies whether to compute the columns of U:
  105: *>       = 'U': N columns of U are returned in the array U.
  106: *>       = 'F': full set of M left sing. vectors is returned in the array U.
  107: *>       = 'W': U may be used as workspace of length M*N. See the description
  108: *>              of U.
  109: *>       = 'N': U is not computed.
  110: *> \endverbatim
  111: *> 
  112: *> \param[in] JOBV
  113: *> \verbatim
  114: *>          JOBV is CHARACTER*1
  115: *>         Specifies whether to compute the matrix V:
  116: *>       = 'V': N columns of V are returned in the array V; Jacobi rotations
  117: *>              are not explicitly accumulated.
  118: *>       = 'J': N columns of V are returned in the array V, but they are
  119: *>              computed as the product of Jacobi rotations. This option is
  120: *>              allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
  121: *>       = 'W': V may be used as workspace of length N*N. See the description
  122: *>              of V.
  123: *>       = 'N': V is not computed.
  124: *> \endverbatim
  125: *> 
  126: *> \param[in] JOBR
  127: *> \verbatim
  128: *>          JOBR is CHARACTER*1
  129: *>         Specifies the RANGE for the singular values. Issues the licence to
  130: *>         set to zero small positive singular values if they are outside
  131: *>         specified range. If A .NE. 0 is scaled so that the largest singular
  132: *>         value of c*A is around SQRT(BIG), BIG=DLAMCH('O'), then JOBR issues
  133: *>         the licence to kill columns of A whose norm in c*A is less than
  134: *>         SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
  135: *>         where SFMIN=DLAMCH('S'), EPSLN=DLAMCH('E').
  136: *>       = 'N': Do not kill small columns of c*A. This option assumes that
  137: *>              BLAS and QR factorizations and triangular solvers are
  138: *>              implemented to work in that range. If the condition of A
  139: *>              is greater than BIG, use ZGESVJ.
  140: *>       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
  141: *>              (roughly, as described above). This option is recommended.
  142: *>                                             ===========================
  143: *>         For computing the singular values in the FULL range [SFMIN,BIG]
  144: *>         use ZGESVJ.
  145: *> \endverbatim
  146: *> 
  147: *> \param[in] JOBT
  148: *> \verbatim
  149: *>          JOBT is CHARACTER*1
  150: *>         If the matrix is square then the procedure may determine to use
  151: *>         transposed A if A^* seems to be better with respect to convergence.
  152: *>         If the matrix is not square, JOBT is ignored. This is subject to
  153: *>         changes in the future.
  154: *>         The decision is based on two values of entropy over the adjoint
  155: *>         orbit of A^* * A. See the descriptions of WORK(6) and WORK(7).
  156: *>       = 'T': transpose if entropy test indicates possibly faster
  157: *>         convergence of Jacobi process if A^* is taken as input. If A is
  158: *>         replaced with A^*, then the row pivoting is included automatically.
  159: *>       = 'N': do not speculate.
  160: *>         This option can be used to compute only the singular values, or the
  161: *>         full SVD (U, SIGMA and V). For only one set of singular vectors
  162: *>         (U or V), the caller should provide both U and V, as one of the
  163: *>         matrices is used as workspace if the matrix A is transposed.
  164: *>         The implementer can easily remove this constraint and make the
  165: *>         code more complicated. See the descriptions of U and V.
  166: *> \endverbatim
  167: *> 
  168: *> \param[in] JOBP
  169: *> \verbatim
  170: *>          JOBP is CHARACTER*1
  171: *>         Issues the licence to introduce structured perturbations to drown
  172: *>         denormalized numbers. This licence should be active if the
  173: *>         denormals are poorly implemented, causing slow computation,
  174: *>         especially in cases of fast convergence (!). For details see [1,2].
  175: *>         For the sake of simplicity, this perturbations are included only
  176: *>         when the full SVD or only the singular values are requested. The
  177: *>         implementer/user can easily add the perturbation for the cases of
  178: *>         computing one set of singular vectors.
  179: *>       = 'P': introduce perturbation
  180: *>       = 'N': do not perturb
  181: *> \endverbatim
  182: *>
  183: *> \param[in] M
  184: *> \verbatim
  185: *>          M is INTEGER
  186: *>         The number of rows of the input matrix A.  M >= 0.
  187: *> \endverbatim
  188: *>
  189: *> \param[in] N
  190: *> \verbatim
  191: *>          N is INTEGER
  192: *>         The number of columns of the input matrix A. M >= N >= 0.
  193: *> \endverbatim
  194: *>
  195: *> \param[in,out] A
  196: *> \verbatim
  197: *>          A is COMPLEX*16 array, dimension (LDA,N)
  198: *>          On entry, the M-by-N matrix A.
  199: *> \endverbatim
  200: *>
  201: *> \param[in] LDA
  202: *> \verbatim
  203: *>          LDA is INTEGER
  204: *>          The leading dimension of the array A.  LDA >= max(1,M).
  205: *> \endverbatim
  206: *>
  207: *> \param[out] SVA
  208: *> \verbatim
  209: *>          SVA is DOUBLE PRECISION array, dimension (N)
  210: *>          On exit,
  211: *>          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
  212: *>            computation SVA contains Euclidean column norms of the
  213: *>            iterated matrices in the array A.
  214: *>          - For WORK(1) .NE. WORK(2): The singular values of A are
  215: *>            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
  216: *>            sigma_max(A) overflows or if small singular values have been
  217: *>            saved from underflow by scaling the input matrix A.
  218: *>          - If JOBR='R' then some of the singular values may be returned
  219: *>            as exact zeros obtained by "set to zero" because they are
  220: *>            below the numerical rank threshold or are denormalized numbers.
  221: *> \endverbatim
  222: *>
  223: *> \param[out] U
  224: *> \verbatim
  225: *>          U is COMPLEX*16 array, dimension ( LDU, N )
  226: *>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
  227: *>                         the left singular vectors.
  228: *>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
  229: *>                         the left singular vectors, including an ONB
  230: *>                         of the orthogonal complement of the Range(A).
  231: *>          If JOBU = 'W'  .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
  232: *>                         then U is used as workspace if the procedure
  233: *>                         replaces A with A^*. In that case, [V] is computed
  234: *>                         in U as left singular vectors of A^* and then
  235: *>                         copied back to the V array. This 'W' option is just
  236: *>                         a reminder to the caller that in this case U is
  237: *>                         reserved as workspace of length N*N.
  238: *>          If JOBU = 'N'  U is not referenced, unless JOBT='T'.
  239: *> \endverbatim
  240: *>
  241: *> \param[in] LDU
  242: *> \verbatim
  243: *>          LDU is INTEGER
  244: *>          The leading dimension of the array U,  LDU >= 1.
  245: *>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
  246: *> \endverbatim
  247: *>
  248: *> \param[out] V
  249: *> \verbatim
  250: *>          V is COMPLEX*16 array, dimension ( LDV, N )
  251: *>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
  252: *>                         the right singular vectors;
  253: *>          If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
  254: *>                         then V is used as workspace if the pprocedure
  255: *>                         replaces A with A^*. In that case, [U] is computed
  256: *>                         in V as right singular vectors of A^* and then
  257: *>                         copied back to the U array. This 'W' option is just
  258: *>                         a reminder to the caller that in this case V is
  259: *>                         reserved as workspace of length N*N.
  260: *>          If JOBV = 'N'  V is not referenced, unless JOBT='T'.
  261: *> \endverbatim
  262: *>
  263: *> \param[in] LDV
  264: *> \verbatim
  265: *>          LDV is INTEGER
  266: *>          The leading dimension of the array V,  LDV >= 1.
  267: *>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
  268: *> \endverbatim
  269: *>
  270: *> \param[out] CWORK
  271: *> \verbatim
  272: *>          CWORK is COMPLEX*16 array, dimension at least LWORK.     
  273: *> \endverbatim
  274: *>
  275: *> \param[in] LWORK
  276: *> \verbatim
  277: *>          LWORK is INTEGER
  278: *>          Length of CWORK to confirm proper allocation of workspace.
  279: *>          LWORK depends on the job:
  280: *>
  281: *>          1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
  282: *>            1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'):
  283: *>               LWORK >= 2*N+1. This is the minimal requirement.
  284: *>               ->> For optimal performance (blocked code) the optimal value
  285: *>               is LWORK >= N + (N+1)*NB. Here NB is the optimal
  286: *>               block size for ZGEQP3 and ZGEQRF.
  287: *>               In general, optimal LWORK is computed as 
  288: *>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF)).        
  289: *>            1.2. .. an estimate of the scaled condition number of A is
  290: *>               required (JOBA='E', or 'G'). In this case, LWORK the minimal
  291: *>               requirement is LWORK >= N*N + 3*N.
  292: *>               ->> For optimal performance (blocked code) the optimal value 
  293: *>               is LWORK >= max(N+(N+1)*NB, N*N+3*N).
  294: *>               In general, the optimal length LWORK is computed as
  295: *>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), 
  296: *>                                                     N+N*N+LWORK(ZPOCON)).
  297: *>
  298: *>          2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
  299: *>             (JOBU.EQ.'N')
  300: *>            -> the minimal requirement is LWORK >= 3*N.
  301: *>            -> For optimal performance, LWORK >= max(N+(N+1)*NB, 3*N,2*N+N*NB),
  302: *>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQF,
  303: *>               ZUNMLQ. In general, the optimal length LWORK is computed as
  304: *>               LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(ZPOCON), N+LWORK(ZGESVJ),
  305: *>                       N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)).
  306: *>
  307: *>          3. If SIGMA and the left singular vectors are needed
  308: *>            -> the minimal requirement is LWORK >= 3*N.
  309: *>            -> For optimal performance:
  310: *>               if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB),
  311: *>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR.
  312: *>               In general, the optimal length LWORK is computed as
  313: *>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZPOCON),
  314: *>                        2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). 
  315: *>               
  316: *>          4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and 
  317: *>            4.1. if JOBV.EQ.'V'  
  318: *>               the minimal requirement is LWORK >= 5*N+2*N*N. 
  319: *>            4.2. if JOBV.EQ.'J' the minimal requirement is 
  320: *>               LWORK >= 4*N+N*N.
  321: *>            In both cases, the allocated CWORK can accommodate blocked runs
  322: *>            of ZGEQP3, ZGEQRF, ZGELQF, ZUNMQR, ZUNMLQ.
  323: *> \endverbatim
  324: *>
  325: *> \param[out] RWORK
  326: *> \verbatim
  327: *>          RWORK is DOUBLE PRECISION array, dimension at least LRWORK.
  328: *>          On exit,
  329: *>          RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1)
  330: *>                    such that SCALE*SVA(1:N) are the computed singular values
  331: *>                    of A. (See the description of SVA().)
  332: *>          RWORK(2) = See the description of RWORK(1).
  333: *>          RWORK(3) = SCONDA is an estimate for the condition number of
  334: *>                    column equilibrated A. (If JOBA .EQ. 'E' or 'G')
  335: *>                    SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
  336: *>                    It is computed using SPOCON. It holds
  337: *>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
  338: *>                    where R is the triangular factor from the QRF of A.
  339: *>                    However, if R is truncated and the numerical rank is
  340: *>                    determined to be strictly smaller than N, SCONDA is
  341: *>                    returned as -1, thus indicating that the smallest
  342: *>                    singular values might be lost.
  343: *>
  344: *>          If full SVD is needed, the following two condition numbers are
  345: *>          useful for the analysis of the algorithm. They are provied for
  346: *>          a developer/implementer who is familiar with the details of
  347: *>          the method.
  348: *>
  349: *>          RWORK(4) = an estimate of the scaled condition number of the
  350: *>                    triangular factor in the first QR factorization.
  351: *>          RWORK(5) = an estimate of the scaled condition number of the
  352: *>                    triangular factor in the second QR factorization.
  353: *>          The following two parameters are computed if JOBT .EQ. 'T'.
  354: *>          They are provided for a developer/implementer who is familiar
  355: *>          with the details of the method.
  356: *>          RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy
  357: *>                    of diag(A^* * A) / Trace(A^* * A) taken as point in the
  358: *>                    probability simplex.
  359: *>          RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).)
  360: *> \endverbatim
  361: *>
  362: *> \param[in] LRWORK
  363: *> \verbatim
  364: *>          LRWORK is INTEGER
  365: *>          Length of RWORK to confirm proper allocation of workspace.
  366: *>          LRWORK depends on the job:
  367: *>
  368: *>       1. If only singular values are requested i.e. if 
  369: *>          LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') 
  370: *>          then:
  371: *>          1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
  372: *>          then LRWORK = max( 7, N + 2 * M ). 
  373: *>          1.2. Otherwise, LRWORK  = max( 7, 2 * N ).
  374: *>       2. If singular values with the right singular vectors are requested
  375: *>          i.e. if 
  376: *>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. 
  377: *>          .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F'))
  378: *>          then:
  379: *>          2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
  380: *>          then LRWORK = max( 7, N + 2 * M ). 
  381: *>          2.2. Otherwise, LRWORK  = max( 7, 2 * N ).      
  382: *>       3. If singular values with the left singular vectors are requested, i.e. if    
  383: *>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
  384: *>          .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
  385: *>          then:
  386: *>          3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
  387: *>          then LRWORK = max( 7, N + 2 * M ). 
  388: *>          3.2. Otherwise, LRWORK  = max( 7, 2 * N ).    
  389: *>       4. If singular values with both the left and the right singular vectors 
  390: *>          are requested, i.e. if     
  391: *>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
  392: *>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
  393: *>          then:
  394: *>          4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
  395: *>          then LRWORK = max( 7, N + 2 * M ). 
  396: *>          4.2. Otherwise, LRWORK  = max( 7, 2 * N ).    
  397: *> \endverbatim
  398: *>          
  399: *> \param[out] IWORK
  400: *> \verbatim
  401: *>          IWORK is INTEGER array, of dimension:
  402: *>                If LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), then 
  403: *>                the dimension of IWORK is max( 3, 2 * N + M ).
  404: *>                Otherwise, the dimension of IWORK is 
  405: *>                -> max( 3, 2*N ) for full SVD
  406: *>                -> max( 3, N ) for singular values only or singular
  407: *>                   values with one set of singular vectors (left or right)
  408: *>          On exit,
  409: *>          IWORK(1) = the numerical rank determined after the initial
  410: *>                     QR factorization with pivoting. See the descriptions
  411: *>                     of JOBA and JOBR.
  412: *>          IWORK(2) = the number of the computed nonzero singular values
  413: *>          IWORK(3) = if nonzero, a warning message:
  414: *>                     If IWORK(3).EQ.1 then some of the column norms of A
  415: *>                     were denormalized floats. The requested high accuracy
  416: *>                     is not warranted by the data.
  417: *> \endverbatim
  418: *>
  419: *> \param[out] INFO
  420: *> \verbatim
  421: *>          INFO is INTEGER
  422: *>           < 0  : if INFO = -i, then the i-th argument had an illegal value.
  423: *>           = 0 :  successfull exit;
  424: *>           > 0 :  ZGEJSV  did not converge in the maximal allowed number
  425: *>                  of sweeps. The computed values may be inaccurate.
  426: *> \endverbatim
  427: *
  428: *  Authors:
  429: *  ========
  430: *
  431: *> \author Univ. of Tennessee 
  432: *> \author Univ. of California Berkeley 
  433: *> \author Univ. of Colorado Denver 
  434: *> \author NAG Ltd. 
  435: *
  436: *> \date June 2016
  437: *
  438: *> \ingroup complex16GEsing
  439: *
  440: *> \par Further Details:
  441: *  =====================
  442: *>
  443: *> \verbatim
  444: *>
  445: *>  ZGEJSV implements a preconditioned Jacobi SVD algorithm. It uses ZGEQP3,
  446: *>  ZGEQRF, and ZGELQF as preprocessors and preconditioners. Optionally, an
  447: *>  additional row pivoting can be used as a preprocessor, which in some
  448: *>  cases results in much higher accuracy. An example is matrix A with the
  449: *>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
  450: *>  diagonal matrices and C is well-conditioned matrix. In that case, complete
  451: *>  pivoting in the first QR factorizations provides accuracy dependent on the
  452: *>  condition number of C, and independent of D1, D2. Such higher accuracy is
  453: *>  not completely understood theoretically, but it works well in practice.
  454: *>  Further, if A can be written as A = B*D, with well-conditioned B and some
  455: *>  diagonal D, then the high accuracy is guaranteed, both theoretically and
  456: *>  in software, independent of D. For more details see [1], [2].
  457: *>     The computational range for the singular values can be the full range
  458: *>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
  459: *>  & LAPACK routines called by ZGEJSV are implemented to work in that range.
  460: *>  If that is not the case, then the restriction for safe computation with
  461: *>  the singular values in the range of normalized IEEE numbers is that the
  462: *>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
  463: *>  overflow. This code (ZGEJSV) is best used in this restricted range,
  464: *>  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
  465: *>  returned as zeros. See JOBR for details on this.
  466: *>     Further, this implementation is somewhat slower than the one described
  467: *>  in [1,2] due to replacement of some non-LAPACK components, and because
  468: *>  the choice of some tuning parameters in the iterative part (ZGESVJ) is
  469: *>  left to the implementer on a particular machine.
  470: *>     The rank revealing QR factorization (in this code: ZGEQP3) should be
  471: *>  implemented as in [3]. We have a new version of ZGEQP3 under development
  472: *>  that is more robust than the current one in LAPACK, with a cleaner cut in
  473: *>  rank defficient cases. It will be available in the SIGMA library [4].
  474: *>  If M is much larger than N, it is obvious that the inital QRF with
  475: *>  column pivoting can be preprocessed by the QRF without pivoting. That
  476: *>  well known trick is not used in ZGEJSV because in some cases heavy row
  477: *>  weighting can be treated with complete pivoting. The overhead in cases
  478: *>  M much larger than N is then only due to pivoting, but the benefits in
  479: *>  terms of accuracy have prevailed. The implementer/user can incorporate
  480: *>  this extra QRF step easily. The implementer can also improve data movement
  481: *>  (matrix transpose, matrix copy, matrix transposed copy) - this
  482: *>  implementation of ZGEJSV uses only the simplest, naive data movement.
  483: *
  484: *> \par Contributors:
  485: *  ==================
  486: *>
  487: *>  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
  488: *
  489: *> \par References:
  490: *  ================
  491: *>
  492: *> \verbatim
  493: *>
  494: *> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
  495: *>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
  496: *>     LAPACK Working note 169.
  497: *> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
  498: *>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
  499: *>     LAPACK Working note 170.
  500: *> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
  501: *>     factorization software - a case study.
  502: *>     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
  503: *>     LAPACK Working note 176.
  504: *> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
  505: *>     QSVD, (H,K)-SVD computations.
  506: *>     Department of Mathematics, University of Zagreb, 2008.
  507: *> \endverbatim
  508: *
  509: *>  \par Bugs, examples and comments:
  510: *   =================================
  511: *>
  512: *>  Please report all bugs and send interesting examples and/or comments to
  513: *>  drmac@math.hr. Thank you.
  514: *>
  515: *  =====================================================================
  516:       SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
  517:      $                   M, N, A, LDA, SVA, U, LDU, V, LDV,
  518:      $                   CWORK, LWORK, RWORK, LRWORK, IWORK, INFO )
  519: *
  520: *  -- LAPACK computational routine (version 3.6.1) --
  521: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  522: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  523: *     June 2016
  524: *
  525: *     .. Scalar Arguments ..
  526:       IMPLICIT    NONE
  527:       INTEGER     INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N
  528: *     ..
  529: *     .. Array Arguments ..
  530:       COMPLEX*16       A( LDA, * ), U( LDU, * ), V( LDV, * ), 
  531:      $                 CWORK( LWORK )
  532:       DOUBLE PRECISION SVA( N ), RWORK( * )
  533:       INTEGER          IWORK( * )
  534:       CHARACTER*1      JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
  535: *     ..
  536: *
  537: *  ===========================================================================
  538: *
  539: *     .. Local Parameters ..
  540:       DOUBLE PRECISION ZERO,         ONE
  541:       PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
  542:       COMPLEX*16                CZERO,       CONE
  543:       PARAMETER  ( CZERO = ( 0.0D0, 0.0D0 ), CONE = ( 1.0D0, 0.0D0 ) )
  544: *     ..
  545: *     .. Local Scalars ..
  546:       COMPLEX*16       CTEMP
  547:       DOUBLE PRECISION AAPP,    AAQQ,   AATMAX, AATMIN, BIG,    BIG1,   
  548:      $                 COND_OK, CONDR1, CONDR2, ENTRA,  ENTRAT, EPSLN,  
  549:      $                 MAXPRJ,  SCALEM, SCONDA, SFMIN,  SMALL,  TEMP1,  
  550:      $                 USCAL1,  USCAL2, XSC
  551:       INTEGER IERR,   N1,     NR,     NUMRANK,        p, q,   WARNING
  552:       LOGICAL ALMORT, DEFR,   ERREST, GOSCAL, JRACC,  KILL,   LSVEC,
  553:      $        L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
  554:      $        NOSCAL, ROWPIV, RSVEC,  TRANSP
  555: *     ..
  556: *     .. Intrinsic Functions ..
  557:       INTRINSIC ABS,  DCMPLX, DCONJG, DLOG, DMAX1, DMIN1, DBLE,
  558:      $          MAX0, MIN0, NINT,  DSQRT
  559: *     ..
  560: *     .. External Functions ..
  561:       DOUBLE PRECISION      DLAMCH, DZNRM2
  562:       INTEGER   IDAMAX, IZAMAX
  563:       LOGICAL   LSAME
  564:       EXTERNAL  IDAMAX, IZAMAX, LSAME, DLAMCH, DZNRM2
  565: *     ..
  566: *     .. External Subroutines ..
  567:       EXTERNAL  DLASSQ, ZCOPY,  ZGELQF, ZGEQP3, ZGEQRF, ZLACPY, ZLASCL,
  568:      $          DLASCL, ZLASET, ZLASSQ, ZLASWP, ZUNGQR, ZUNMLQ,
  569:      $          ZUNMQR, ZPOCON, DSCAL, ZDSCAL, ZSWAP,  ZTRSM,  XERBLA
  570: *
  571:       EXTERNAL  ZGESVJ
  572: *     ..
  573: *
  574: *     Test the input arguments
  575: *
  576: 
  577:       LSVEC  = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' )
  578:       JRACC  = LSAME( JOBV, 'J' )
  579:       RSVEC  = LSAME( JOBV, 'V' ) .OR. JRACC
  580:       ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' )
  581:       L2RANK = LSAME( JOBA, 'R' )
  582:       L2ABER = LSAME( JOBA, 'A' )
  583:       ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' )
  584:       L2TRAN = LSAME( JOBT, 'T' )
  585:       L2KILL = LSAME( JOBR, 'R' )
  586:       DEFR   = LSAME( JOBR, 'N' )
  587:       L2PERT = LSAME( JOBP, 'P' )
  588: *
  589:       IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR.
  590:      $     ERREST .OR. LSAME( JOBA, 'C' ) )) THEN
  591:          INFO = - 1
  592:       ELSE IF ( .NOT.( LSVEC  .OR. LSAME( JOBU, 'N' ) .OR.
  593:      $                             LSAME( JOBU, 'W' )) ) THEN
  594:          INFO = - 2
  595:       ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR.
  596:      $   LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN
  597:          INFO = - 3
  598:       ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) )    THEN
  599:          INFO = - 4
  600:       ELSE IF ( .NOT. ( L2TRAN .OR. LSAME( JOBT, 'N' ) ) ) THEN
  601:          INFO = - 5
  602:       ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN
  603:          INFO = - 6
  604:       ELSE IF ( M .LT. 0 ) THEN
  605:          INFO = - 7
  606:       ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN
  607:          INFO = - 8
  608:       ELSE IF ( LDA .LT. M ) THEN
  609:          INFO = - 10
  610:       ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN
  611:          INFO = - 13
  612:       ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN
  613:          INFO = - 15
  614:       ELSE IF ( (.NOT.(LSVEC .OR. RSVEC .OR. ERREST).AND.
  615:      $                           (LWORK .LT. 2*N+1)) .OR.
  616:      $ (.NOT.(LSVEC .OR. RSVEC) .AND. ERREST .AND.
  617:      $                         (LWORK .LT. N*N+3*N)) .OR.
  618:      $ (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. 3*N))
  619:      $ .OR.
  620:      $ (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. 3*N))
  621:      $ .OR.
  622:      $ (LSVEC .AND. RSVEC .AND. (.NOT.JRACC) .AND. 
  623:      $                          (LWORK.LT.5*N+2*N*N))
  624:      $ .OR. (LSVEC .AND. RSVEC .AND. JRACC .AND.
  625:      $                          LWORK.LT.4*N+N*N))
  626:      $   THEN
  627:          INFO = - 17
  628:       ELSE IF ( LRWORK.LT. MAX0(N+2*M,7)) THEN
  629:          INFO = -19 
  630:       ELSE
  631: *        #:)
  632:          INFO = 0
  633:       END IF
  634: *
  635:       IF ( INFO .NE. 0 ) THEN
  636: *       #:(
  637:          CALL XERBLA( 'ZGEJSV', - INFO )
  638:          RETURN
  639:       END IF
  640: *
  641: *     Quick return for void matrix (Y3K safe)
  642: * #:)
  643:       IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN
  644:          IWORK(1:3) = 0
  645:          RWORK(1:7) = 0
  646:          RETURN
  647:       ENDIF
  648: *
  649: *     Determine whether the matrix U should be M x N or M x M
  650: *
  651:       IF ( LSVEC ) THEN
  652:          N1 = N
  653:          IF ( LSAME( JOBU, 'F' ) ) N1 = M
  654:       END IF
  655: *
  656: *     Set numerical parameters
  657: *
  658: *!    NOTE: Make sure DLAMCH() does not fail on the target architecture.
  659: *
  660:       EPSLN = DLAMCH('Epsilon')
  661:       SFMIN = DLAMCH('SafeMinimum')
  662:       SMALL = SFMIN / EPSLN
  663:       BIG   = DLAMCH('O')
  664: *     BIG   = ONE / SFMIN
  665: *
  666: *     Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
  667: *
  668: *(!)  If necessary, scale SVA() to protect the largest norm from
  669: *     overflow. It is possible that this scaling pushes the smallest
  670: *     column norm left from the underflow threshold (extreme case).
  671: *
  672:       SCALEM  = ONE / DSQRT(DBLE(M)*DBLE(N))
  673:       NOSCAL  = .TRUE.
  674:       GOSCAL  = .TRUE.
  675:       DO 1874 p = 1, N
  676:          AAPP = ZERO
  677:          AAQQ = ONE
  678:          CALL ZLASSQ( M, A(1,p), 1, AAPP, AAQQ )
  679:          IF ( AAPP .GT. BIG ) THEN
  680:             INFO = - 9
  681:             CALL XERBLA( 'ZGEJSV', -INFO )
  682:             RETURN
  683:          END IF
  684:          AAQQ = DSQRT(AAQQ)
  685:          IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL  ) THEN
  686:             SVA(p)  = AAPP * AAQQ
  687:          ELSE
  688:             NOSCAL  = .FALSE.
  689:             SVA(p)  = AAPP * ( AAQQ * SCALEM )
  690:             IF ( GOSCAL ) THEN
  691:                GOSCAL = .FALSE.
  692:                CALL DSCAL( p-1, SCALEM, SVA, 1 )
  693:             END IF
  694:          END IF
  695:  1874 CONTINUE
  696: *
  697:       IF ( NOSCAL ) SCALEM = ONE
  698: *
  699:       AAPP = ZERO
  700:       AAQQ = BIG
  701:       DO 4781 p = 1, N
  702:          AAPP = DMAX1( AAPP, SVA(p) )
  703:          IF ( SVA(p) .NE. ZERO ) AAQQ = DMIN1( AAQQ, SVA(p) )
  704:  4781 CONTINUE
  705: *
  706: *     Quick return for zero M x N matrix
  707: * #:)
  708:       IF ( AAPP .EQ. ZERO ) THEN
  709:          IF ( LSVEC ) CALL ZLASET( 'G', M, N1, CZERO, CONE, U, LDU )
  710:          IF ( RSVEC ) CALL ZLASET( 'G', N, N,  CZERO, CONE, V, LDV )
  711:          RWORK(1) = ONE
  712:          RWORK(2) = ONE
  713:          IF ( ERREST ) RWORK(3) = ONE
  714:          IF ( LSVEC .AND. RSVEC ) THEN
  715:             RWORK(4) = ONE
  716:             RWORK(5) = ONE
  717:          END IF
  718:          IF ( L2TRAN ) THEN
  719:             RWORK(6) = ZERO
  720:             RWORK(7) = ZERO
  721:          END IF
  722:          IWORK(1) = 0
  723:          IWORK(2) = 0
  724:          IWORK(3) = 0
  725:          RETURN
  726:       END IF
  727: *
  728: *     Issue warning if denormalized column norms detected. Override the
  729: *     high relative accuracy request. Issue licence to kill columns
  730: *     (set them to zero) whose norm is less than sigma_max / BIG (roughly).
  731: * #:(
  732:       WARNING = 0
  733:       IF ( AAQQ .LE. SFMIN ) THEN
  734:          L2RANK = .TRUE.
  735:          L2KILL = .TRUE.
  736:          WARNING = 1
  737:       END IF
  738: *
  739: *     Quick return for one-column matrix
  740: * #:)
  741:       IF ( N .EQ. 1 ) THEN
  742: *
  743:          IF ( LSVEC ) THEN
  744:             CALL ZLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR )
  745:             CALL ZLACPY( 'A', M, 1, A, LDA, U, LDU )
  746: *           computing all M left singular vectors of the M x 1 matrix
  747:             IF ( N1 .NE. N  ) THEN
  748:               CALL ZGEQRF( M, N, U,LDU, CWORK, CWORK(N+1),LWORK-N,IERR )
  749:               CALL ZUNGQR( M,N1,1, U,LDU,CWORK,CWORK(N+1),LWORK-N,IERR )
  750:               CALL ZCOPY( M, A(1,1), 1, U(1,1), 1 )
  751:             END IF
  752:          END IF
  753:          IF ( RSVEC ) THEN
  754:              V(1,1) = CONE
  755:          END IF
  756:          IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN
  757:             SVA(1)  = SVA(1) / SCALEM
  758:             SCALEM  = ONE
  759:          END IF
  760:          RWORK(1) = ONE / SCALEM
  761:          RWORK(2) = ONE
  762:          IF ( SVA(1) .NE. ZERO ) THEN
  763:             IWORK(1) = 1
  764:             IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN
  765:                IWORK(2) = 1
  766:             ELSE
  767:                IWORK(2) = 0
  768:             END IF
  769:          ELSE
  770:             IWORK(1) = 0
  771:             IWORK(2) = 0
  772:          END IF
  773:          IWORK(3) = 0 
  774:          IF ( ERREST ) RWORK(3) = ONE
  775:          IF ( LSVEC .AND. RSVEC ) THEN
  776:             RWORK(4) = ONE
  777:             RWORK(5) = ONE
  778:          END IF
  779:          IF ( L2TRAN ) THEN
  780:             RWORK(6) = ZERO
  781:             RWORK(7) = ZERO
  782:          END IF
  783:          RETURN
  784: *
  785:       END IF
  786: *
  787:       TRANSP = .FALSE.
  788:       L2TRAN = L2TRAN .AND. ( M .EQ. N )
  789: *
  790:       AATMAX = -ONE
  791:       AATMIN =  BIG
  792:       IF ( ROWPIV .OR. L2TRAN ) THEN
  793: *
  794: *     Compute the row norms, needed to determine row pivoting sequence
  795: *     (in the case of heavily row weighted A, row pivoting is strongly
  796: *     advised) and to collect information needed to compare the
  797: *     structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.).
  798: *
  799:          IF ( L2TRAN ) THEN
  800:             DO 1950 p = 1, M
  801:                XSC   = ZERO
  802:                TEMP1 = ONE
  803:                CALL ZLASSQ( N, A(p,1), LDA, XSC, TEMP1 )
  804: *              ZLASSQ gets both the ell_2 and the ell_infinity norm
  805: *              in one pass through the vector
  806:                RWORK(M+N+p)  = XSC * SCALEM
  807:                RWORK(N+p)    = XSC * (SCALEM*DSQRT(TEMP1))
  808:                AATMAX = DMAX1( AATMAX, RWORK(N+p) )
  809:                IF (RWORK(N+p) .NE. ZERO) 
  810:      $            AATMIN = DMIN1(AATMIN,RWORK(N+p))
  811:  1950       CONTINUE
  812:          ELSE
  813:             DO 1904 p = 1, M
  814:                RWORK(M+N+p) = SCALEM*ABS( A(p,IZAMAX(N,A(p,1),LDA)) )
  815:                AATMAX = DMAX1( AATMAX, RWORK(M+N+p) )
  816:                AATMIN = DMIN1( AATMIN, RWORK(M+N+p) )
  817:  1904       CONTINUE
  818:          END IF
  819: *
  820:       END IF
  821: *
  822: *     For square matrix A try to determine whether A^*  would be  better
  823: *     input for the preconditioned Jacobi SVD, with faster convergence.
  824: *     The decision is based on an O(N) function of the vector of column
  825: *     and row norms of A, based on the Shannon entropy. This should give
  826: *     the right choice in most cases when the difference actually matters.
  827: *     It may fail and pick the slower converging side.
  828: *
  829:       ENTRA  = ZERO
  830:       ENTRAT = ZERO
  831:       IF ( L2TRAN ) THEN
  832: *
  833:          XSC   = ZERO
  834:          TEMP1 = ONE
  835:          CALL DLASSQ( N, SVA, 1, XSC, TEMP1 )
  836:          TEMP1 = ONE / TEMP1
  837: *
  838:          ENTRA = ZERO
  839:          DO 1113 p = 1, N
  840:             BIG1  = ( ( SVA(p) / XSC )**2 ) * TEMP1
  841:             IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * DLOG(BIG1)
  842:  1113    CONTINUE
  843:          ENTRA = - ENTRA / DLOG(DBLE(N))
  844: *
  845: *        Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
  846: *        It is derived from the diagonal of  A^* * A.  Do the same with the
  847: *        diagonal of A * A^*, compute the entropy of the corresponding
  848: *        probability distribution. Note that A * A^* and A^* * A have the
  849: *        same trace.
  850: *
  851:          ENTRAT = ZERO
  852:          DO 1114 p = N+1, N+M
  853:             BIG1 = ( ( RWORK(p) / XSC )**2 ) * TEMP1
  854:             IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * DLOG(BIG1)
  855:  1114    CONTINUE
  856:          ENTRAT = - ENTRAT / DLOG(DBLE(M))
  857: *
  858: *        Analyze the entropies and decide A or A^*. Smaller entropy
  859: *        usually means better input for the algorithm.
  860: *
  861:          TRANSP = ( ENTRAT .LT. ENTRA )
  862:          TRANSP = .TRUE.
  863: *
  864: *        If A^* is better than A, take the adjoint of A.
  865: *
  866:          IF ( TRANSP ) THEN
  867: *           In an optimal implementation, this trivial transpose
  868: *           should be replaced with faster transpose.
  869:             DO 1115 p = 1, N - 1
  870:                A(p,p) = DCONJG(A(p,p)) 
  871:                DO 1116 q = p + 1, N
  872:                    CTEMP = DCONJG(A(q,p))
  873:                   A(q,p) = DCONJG(A(p,q))
  874:                   A(p,q) = CTEMP
  875:  1116          CONTINUE
  876:  1115       CONTINUE
  877:             A(N,N) = DCONJG(A(N,N))
  878:             DO 1117 p = 1, N
  879:                RWORK(M+N+p) = SVA(p)
  880:                SVA(p)      = RWORK(N+p)
  881: *              previously computed row 2-norms are now column 2-norms 
  882: *              of the transposed matrix               
  883:  1117       CONTINUE
  884:             TEMP1  = AAPP
  885:             AAPP   = AATMAX
  886:             AATMAX = TEMP1
  887:             TEMP1  = AAQQ
  888:             AAQQ   = AATMIN
  889:             AATMIN = TEMP1
  890:             KILL   = LSVEC
  891:             LSVEC  = RSVEC
  892:             RSVEC  = KILL
  893:             IF ( LSVEC ) N1 = N 
  894: *
  895:             ROWPIV = .TRUE.
  896:          END IF
  897: *
  898:       END IF
  899: *     END IF L2TRAN
  900: *
  901: *     Scale the matrix so that its maximal singular value remains less
  902: *     than SQRT(BIG) -- the matrix is scaled so that its maximal column
  903: *     has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
  904: *     SQRT(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and
  905: *     BLAS routines that, in some implementations, are not capable of
  906: *     working in the full interval [SFMIN,BIG] and that they may provoke
  907: *     overflows in the intermediate results. If the singular values spread
  908: *     from SFMIN to BIG, then ZGESVJ will compute them. So, in that case,
  909: *     one should use ZGESVJ instead of ZGEJSV.
  910: *
  911:       BIG1   = DSQRT( BIG )
  912:       TEMP1  = DSQRT( BIG / DBLE(N) )
  913: *
  914:       CALL DLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR )
  915:       IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN
  916:           AAQQ = ( AAQQ / AAPP ) * TEMP1
  917:       ELSE
  918:           AAQQ = ( AAQQ * TEMP1 ) / AAPP
  919:       END IF
  920:       TEMP1 = TEMP1 * SCALEM
  921:       CALL ZLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR )
  922: *
  923: *     To undo scaling at the end of this procedure, multiply the
  924: *     computed singular values with USCAL2 / USCAL1.
  925: *
  926:       USCAL1 = TEMP1
  927:       USCAL2 = AAPP
  928: *
  929:       IF ( L2KILL ) THEN
  930: *        L2KILL enforces computation of nonzero singular values in
  931: *        the restricted range of condition number of the initial A,
  932: *        sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
  933:          XSC = DSQRT( SFMIN )
  934:       ELSE
  935:          XSC = SMALL
  936: *
  937: *        Now, if the condition number of A is too big,
  938: *        sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
  939: *        as a precaution measure, the full SVD is computed using ZGESVJ
  940: *        with accumulated Jacobi rotations. This provides numerically
  941: *        more robust computation, at the cost of slightly increased run
  942: *        time. Depending on the concrete implementation of BLAS and LAPACK
  943: *        (i.e. how they behave in presence of extreme ill-conditioning) the
  944: *        implementor may decide to remove this switch.
  945:          IF ( ( AAQQ.LT.DSQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN
  946:             JRACC = .TRUE.
  947:          END IF
  948: *
  949:       END IF
  950:       IF ( AAQQ .LT. XSC ) THEN
  951:          DO 700 p = 1, N
  952:             IF ( SVA(p) .LT. XSC ) THEN
  953:                CALL ZLASET( 'A', M, 1, CZERO, CZERO, A(1,p), LDA )
  954:                SVA(p) = ZERO
  955:             END IF
  956:  700     CONTINUE
  957:       END IF
  958: *
  959: *     Preconditioning using QR factorization with pivoting
  960: *
  961:       IF ( ROWPIV ) THEN
  962: *        Optional row permutation (Bjoerck row pivoting):
  963: *        A result by Cox and Higham shows that the Bjoerck's
  964: *        row pivoting combined with standard column pivoting
  965: *        has similar effect as Powell-Reid complete pivoting.
  966: *        The ell-infinity norms of A are made nonincreasing.
  967:          DO 1952 p = 1, M - 1
  968:             q = IDAMAX( M-p+1, RWORK(M+N+p), 1 ) + p - 1
  969:             IWORK(2*N+p) = q
  970:             IF ( p .NE. q ) THEN
  971:                TEMP1        = RWORK(M+N+p)
  972:                RWORK(M+N+p) = RWORK(M+N+q)
  973:                RWORK(M+N+q) = TEMP1
  974:             END IF
  975:  1952    CONTINUE
  976:          CALL ZLASWP( N, A, LDA, 1, M-1, IWORK(2*N+1), 1 )
  977:       END IF
  978: 
  979: *
  980: *     End of the preparation phase (scaling, optional sorting and
  981: *     transposing, optional flushing of small columns).
  982: *
  983: *     Preconditioning
  984: *
  985: *     If the full SVD is needed, the right singular vectors are computed
  986: *     from a matrix equation, and for that we need theoretical analysis
  987: *     of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF.
  988: *     In all other cases the first RR QRF can be chosen by other criteria
  989: *     (eg speed by replacing global with restricted window pivoting, such
  990: *     as in xGEQPX from TOMS # 782). Good results will be obtained using
  991: *     xGEQPX with properly (!) chosen numerical parameters.
  992: *     Any improvement of ZGEQP3 improves overal performance of ZGEJSV.
  993: *
  994: *     A * P1 = Q1 * [ R1^* 0]^*:
  995:       DO 1963 p = 1, N
  996: *        .. all columns are free columns
  997:          IWORK(p) = 0
  998:  1963 CONTINUE
  999:       CALL ZGEQP3( M, N, A, LDA, IWORK, CWORK, CWORK(N+1), LWORK-N, 
 1000:      $             RWORK, IERR )
 1001: *
 1002: *     The upper triangular matrix R1 from the first QRF is inspected for
 1003: *     rank deficiency and possibilities for deflation, or possible
 1004: *     ill-conditioning. Depending on the user specified flag L2RANK,
 1005: *     the procedure explores possibilities to reduce the numerical
 1006: *     rank by inspecting the computed upper triangular factor. If
 1007: *     L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of
 1008: *     A + dA, where ||dA|| <= f(M,N)*EPSLN.
 1009: *
 1010:       NR = 1
 1011:       IF ( L2ABER ) THEN
 1012: *        Standard absolute error bound suffices. All sigma_i with
 1013: *        sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
 1014: *        agressive enforcement of lower numerical rank by introducing a
 1015: *        backward error of the order of N*EPSLN*||A||.
 1016:          TEMP1 = DSQRT(DBLE(N))*EPSLN
 1017:          DO 3001 p = 2, N
 1018:             IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN
 1019:                NR = NR + 1
 1020:             ELSE
 1021:                GO TO 3002
 1022:             END IF
 1023:  3001    CONTINUE
 1024:  3002    CONTINUE
 1025:       ELSE IF ( L2RANK ) THEN
 1026: *        .. similarly as above, only slightly more gentle (less agressive).
 1027: *        Sudden drop on the diagonal of R1 is used as the criterion for
 1028: *        close-to-rank-defficient.
 1029:          TEMP1 = DSQRT(SFMIN)
 1030:          DO 3401 p = 2, N
 1031:             IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR.
 1032:      $           ( ABS(A(p,p)) .LT. SMALL ) .OR.
 1033:      $           ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402
 1034:             NR = NR + 1
 1035:  3401    CONTINUE
 1036:  3402    CONTINUE
 1037: *
 1038:       ELSE
 1039: *        The goal is high relative accuracy. However, if the matrix
 1040: *        has high scaled condition number the relative accuracy is in
 1041: *        general not feasible. Later on, a condition number estimator
 1042: *        will be deployed to estimate the scaled condition number.
 1043: *        Here we just remove the underflowed part of the triangular
 1044: *        factor. This prevents the situation in which the code is
 1045: *        working hard to get the accuracy not warranted by the data.
 1046:          TEMP1  = DSQRT(SFMIN)
 1047:          DO 3301 p = 2, N
 1048:             IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR.
 1049:      $           ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302
 1050:             NR = NR + 1
 1051:  3301    CONTINUE
 1052:  3302    CONTINUE
 1053: *
 1054:       END IF
 1055: *
 1056:       ALMORT = .FALSE.
 1057:       IF ( NR .EQ. N ) THEN
 1058:          MAXPRJ = ONE
 1059:          DO 3051 p = 2, N
 1060:             TEMP1  = ABS(A(p,p)) / SVA(IWORK(p))
 1061:             MAXPRJ = DMIN1( MAXPRJ, TEMP1 )
 1062:  3051    CONTINUE
 1063:          IF ( MAXPRJ**2 .GE. ONE - DBLE(N)*EPSLN ) ALMORT = .TRUE.
 1064:       END IF
 1065: *
 1066: *
 1067:       SCONDA = - ONE
 1068:       CONDR1 = - ONE
 1069:       CONDR2 = - ONE
 1070: *
 1071:       IF ( ERREST ) THEN
 1072:          IF ( N .EQ. NR ) THEN
 1073:             IF ( RSVEC ) THEN
 1074: *              .. V is available as workspace
 1075:                CALL ZLACPY( 'U', N, N, A, LDA, V, LDV )
 1076:                DO 3053 p = 1, N
 1077:                   TEMP1 = SVA(IWORK(p))
 1078:                   CALL ZDSCAL( p, ONE/TEMP1, V(1,p), 1 )
 1079:  3053          CONTINUE
 1080:                CALL ZPOCON( 'U', N, V, LDV, ONE, TEMP1,
 1081:      $              CWORK(N+1), RWORK, IERR )
 1082: *          
 1083:             ELSE IF ( LSVEC ) THEN
 1084: *              .. U is available as workspace
 1085:                CALL ZLACPY( 'U', N, N, A, LDA, U, LDU )
 1086:                DO 3054 p = 1, N
 1087:                   TEMP1 = SVA(IWORK(p))
 1088:                   CALL ZDSCAL( p, ONE/TEMP1, U(1,p), 1 )
 1089:  3054          CONTINUE
 1090:                CALL ZPOCON( 'U', N, U, LDU, ONE, TEMP1,
 1091:      $              CWORK(N+1), RWORK, IERR )
 1092:             ELSE
 1093:                CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N )
 1094:                DO 3052 p = 1, N
 1095:                   TEMP1 = SVA(IWORK(p))
 1096:                   CALL ZDSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 )
 1097:  3052          CONTINUE
 1098: *           .. the columns of R are scaled to have unit Euclidean lengths.
 1099:                CALL ZPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1,
 1100:      $              CWORK(N+N*N+1), RWORK, IERR )
 1101: *              
 1102:             END IF
 1103:             SCONDA = ONE / DSQRT(TEMP1)
 1104: *           SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
 1105: *           N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
 1106:          ELSE
 1107:             SCONDA = - ONE
 1108:          END IF
 1109:       END IF
 1110: *
 1111:       L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. DSQRT(BIG1) )
 1112: *     If there is no violent scaling, artificial perturbation is not needed.
 1113: *
 1114: *     Phase 3:
 1115: *
 1116:       IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN
 1117: *
 1118: *         Singular Values only
 1119: *
 1120: *         .. transpose A(1:NR,1:N)
 1121:          DO 1946 p = 1, MIN0( N-1, NR )
 1122:             CALL ZCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 )
 1123:             CALL ZLACGV( N-p+1, A(p,p), 1 )
 1124:  1946    CONTINUE
 1125:          IF ( NR .EQ. N ) A(N,N) = DCONJG(A(N,N))        
 1126: *
 1127: *        The following two DO-loops introduce small relative perturbation
 1128: *        into the strict upper triangle of the lower triangular matrix.
 1129: *        Small entries below the main diagonal are also changed.
 1130: *        This modification is useful if the computing environment does not
 1131: *        provide/allow FLUSH TO ZERO underflow, for it prevents many
 1132: *        annoying denormalized numbers in case of strongly scaled matrices.
 1133: *        The perturbation is structured so that it does not introduce any
 1134: *        new perturbation of the singular values, and it does not destroy
 1135: *        the job done by the preconditioner.
 1136: *        The licence for this perturbation is in the variable L2PERT, which
 1137: *        should be .FALSE. if FLUSH TO ZERO underflow is active.
 1138: *
 1139:          IF ( .NOT. ALMORT ) THEN
 1140: *
 1141:             IF ( L2PERT ) THEN
 1142: *              XSC = SQRT(SMALL)
 1143:                XSC = EPSLN / DBLE(N)
 1144:                DO 4947 q = 1, NR
 1145:                   CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO)
 1146:                   DO 4949 p = 1, N
 1147:                      IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) )
 1148:      $                    .OR. ( p .LT. q ) )
 1149: *     $                     A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
 1150:      $                     A(p,q) = CTEMP
 1151:  4949             CONTINUE
 1152:  4947          CONTINUE
 1153:             ELSE
 1154:                CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, A(1,2),LDA )
 1155:             END IF
 1156: *
 1157: *            .. second preconditioning using the QR factorization
 1158: *
 1159:             CALL ZGEQRF( N,NR, A,LDA, CWORK, CWORK(N+1),LWORK-N, IERR )
 1160: *
 1161: *           .. and transpose upper to lower triangular
 1162:             DO 1948 p = 1, NR - 1
 1163:                CALL ZCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 )
 1164:                CALL ZLACGV( NR-p+1, A(p,p), 1 )
 1165:  1948       CONTINUE
 1166: *
 1167:       END IF
 1168: *
 1169: *           Row-cyclic Jacobi SVD algorithm with column pivoting
 1170: *
 1171: *           .. again some perturbation (a "background noise") is added
 1172: *           to drown denormals
 1173:             IF ( L2PERT ) THEN
 1174: *              XSC = SQRT(SMALL)
 1175:                XSC = EPSLN / DBLE(N)
 1176:                DO 1947 q = 1, NR
 1177:                   CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO)
 1178:                   DO 1949 p = 1, NR
 1179:                      IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) )
 1180:      $                       .OR. ( p .LT. q ) )
 1181: *     $                   A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
 1182:      $                   A(p,q) = CTEMP 
 1183:  1949             CONTINUE
 1184:  1947          CONTINUE
 1185:             ELSE
 1186:                CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, A(1,2), LDA )
 1187:             END IF
 1188: *
 1189: *           .. and one-sided Jacobi rotations are started on a lower
 1190: *           triangular matrix (plus perturbation which is ignored in
 1191: *           the part which destroys triangular form (confusing?!))
 1192: *
 1193:             CALL ZGESVJ( 'L', 'NoU', 'NoV', NR, NR, A, LDA, SVA,
 1194:      $                N, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO )
 1195: *
 1196:             SCALEM  = RWORK(1)
 1197:             NUMRANK = NINT(RWORK(2))
 1198: *
 1199: *
 1200:       ELSE IF ( RSVEC .AND. ( .NOT. LSVEC ) ) THEN
 1201: *
 1202: *        -> Singular Values and Right Singular Vectors <-
 1203: *
 1204:          IF ( ALMORT ) THEN
 1205: *
 1206: *           .. in this case NR equals N
 1207:             DO 1998 p = 1, NR
 1208:                CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
 1209:                CALL ZLACGV( N-p+1, V(p,p), 1 )
 1210:  1998       CONTINUE
 1211:             CALL ZLASET( 'Upper', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV )
 1212: *
 1213:             CALL ZGESVJ( 'L','U','N', N, NR, V,LDV, SVA, NR, A,LDA,
 1214:      $                  CWORK, LWORK, RWORK, LRWORK, INFO )
 1215:             SCALEM  = RWORK(1)
 1216:             NUMRANK = NINT(RWORK(2))
 1217: 
 1218:          ELSE
 1219: *
 1220: *        .. two more QR factorizations ( one QRF is not enough, two require
 1221: *        accumulated product of Jacobi rotations, three are perfect )
 1222: *
 1223:             CALL ZLASET( 'Lower', NR-1,NR-1, CZERO, CZERO, A(2,1), LDA )
 1224:             CALL ZGELQF( NR,N, A, LDA, CWORK, CWORK(N+1), LWORK-N, IERR)
 1225:             CALL ZLACPY( 'Lower', NR, NR, A, LDA, V, LDV )
 1226:             CALL ZLASET( 'Upper', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV )
 1227:             CALL ZGEQRF( NR, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
 1228:      $                   LWORK-2*N, IERR )
 1229:             DO 8998 p = 1, NR
 1230:                CALL ZCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 )
 1231:                CALL ZLACGV( NR-p+1, V(p,p), 1 ) 
 1232:  8998       CONTINUE
 1233:             CALL ZLASET('Upper', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV)
 1234: *
 1235:             CALL ZGESVJ( 'Lower', 'U','N', NR, NR, V,LDV, SVA, NR, U,
 1236:      $                  LDU, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO )
 1237:             SCALEM  = RWORK(1)
 1238:             NUMRANK = NINT(RWORK(2))
 1239:             IF ( NR .LT. N ) THEN
 1240:                CALL ZLASET( 'A',N-NR, NR, CZERO,CZERO, V(NR+1,1),  LDV )
 1241:                CALL ZLASET( 'A',NR, N-NR, CZERO,CZERO, V(1,NR+1),  LDV )
 1242:                CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE, V(NR+1,NR+1),LDV )
 1243:             END IF
 1244: *
 1245:          CALL ZUNMLQ( 'Left', 'C', N, N, NR, A, LDA, CWORK,
 1246:      $               V, LDV, CWORK(N+1), LWORK-N, IERR )
 1247: *
 1248:          END IF
 1249: *
 1250:          DO 8991 p = 1, N
 1251:             CALL ZCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA )
 1252:  8991    CONTINUE
 1253:          CALL ZLACPY( 'All', N, N, A, LDA, V, LDV )
 1254: *
 1255:          IF ( TRANSP ) THEN
 1256:             CALL ZLACPY( 'All', N, N, V, LDV, U, LDU )
 1257:          END IF
 1258: *
 1259:       ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN
 1260: *
 1261: *        .. Singular Values and Left Singular Vectors                 ..
 1262: *
 1263: *        .. second preconditioning step to avoid need to accumulate
 1264: *        Jacobi rotations in the Jacobi iterations.
 1265:          DO 1965 p = 1, NR
 1266:             CALL ZCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 )
 1267:             CALL ZLACGV( N-p+1, U(p,p), 1 )
 1268:  1965    CONTINUE
 1269:          CALL ZLASET( 'Upper', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU )
 1270: *
 1271:          CALL ZGEQRF( N, NR, U, LDU, CWORK(N+1), CWORK(2*N+1),
 1272:      $              LWORK-2*N, IERR )
 1273: *
 1274:          DO 1967 p = 1, NR - 1
 1275:             CALL ZCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 )
 1276:             CALL ZLACGV( N-p+1, U(p,p), 1 )            
 1277:  1967    CONTINUE
 1278:          CALL ZLASET( 'Upper', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU )
 1279: *
 1280:          CALL ZGESVJ( 'Lower', 'U', 'N', NR,NR, U, LDU, SVA, NR, A,
 1281:      $        LDA, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO )
 1282:          SCALEM  = RWORK(1)
 1283:          NUMRANK = NINT(RWORK(2))
 1284: *
 1285:          IF ( NR .LT. M ) THEN
 1286:             CALL ZLASET( 'A',  M-NR, NR,CZERO, CZERO, U(NR+1,1), LDU )
 1287:             IF ( NR .LT. N1 ) THEN
 1288:                CALL ZLASET( 'A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU )
 1289:                CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE,U(NR+1,NR+1),LDU )
 1290:             END IF
 1291:          END IF
 1292: *
 1293:          CALL ZUNMQR( 'Left', 'No Tr', M, N1, N, A, LDA, CWORK, U,
 1294:      $               LDU, CWORK(N+1), LWORK-N, IERR )
 1295: *
 1296:          IF ( ROWPIV )
 1297:      $       CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 1298: *
 1299:          DO 1974 p = 1, N1
 1300:             XSC = ONE / DZNRM2( M, U(1,p), 1 )
 1301:             CALL ZDSCAL( M, XSC, U(1,p), 1 )
 1302:  1974    CONTINUE
 1303: *
 1304:          IF ( TRANSP ) THEN
 1305:             CALL ZLACPY( 'All', N, N, U, LDU, V, LDV )
 1306:          END IF
 1307: *
 1308:       ELSE
 1309: *
 1310: *        .. Full SVD ..
 1311: *
 1312:          IF ( .NOT. JRACC ) THEN
 1313: *
 1314:          IF ( .NOT. ALMORT ) THEN
 1315: *
 1316: *           Second Preconditioning Step (QRF [with pivoting])
 1317: *           Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
 1318: *           equivalent to an LQF CALL. Since in many libraries the QRF
 1319: *           seems to be better optimized than the LQF, we do explicit
 1320: *           transpose and use the QRF. This is subject to changes in an
 1321: *           optimized implementation of ZGEJSV.
 1322: *
 1323:             DO 1968 p = 1, NR
 1324:                CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
 1325:                CALL ZLACGV( N-p+1, V(p,p), 1 )
 1326:  1968       CONTINUE
 1327: *
 1328: *           .. the following two loops perturb small entries to avoid
 1329: *           denormals in the second QR factorization, where they are
 1330: *           as good as zeros. This is done to avoid painfully slow
 1331: *           computation with denormals. The relative size of the perturbation
 1332: *           is a parameter that can be changed by the implementer.
 1333: *           This perturbation device will be obsolete on machines with
 1334: *           properly implemented arithmetic.
 1335: *           To switch it off, set L2PERT=.FALSE. To remove it from  the
 1336: *           code, remove the action under L2PERT=.TRUE., leave the ELSE part.
 1337: *           The following two loops should be blocked and fused with the
 1338: *           transposed copy above.
 1339: *
 1340:             IF ( L2PERT ) THEN
 1341:                XSC = DSQRT(SMALL)
 1342:                DO 2969 q = 1, NR
 1343:                   CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO)
 1344:                   DO 2968 p = 1, N
 1345:                      IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 )
 1346:      $                   .OR. ( p .LT. q ) )
 1347: *     $                   V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
 1348:      $                   V(p,q) = CTEMP      
 1349:                      IF ( p .LT. q ) V(p,q) = - V(p,q)
 1350:  2968             CONTINUE
 1351:  2969          CONTINUE
 1352:             ELSE
 1353:                CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV )
 1354:             END IF
 1355: *
 1356: *           Estimate the row scaled condition number of R1
 1357: *           (If R1 is rectangular, N > NR, then the condition number
 1358: *           of the leading NR x NR submatrix is estimated.)
 1359: *
 1360:             CALL ZLACPY( 'L', NR, NR, V, LDV, CWORK(2*N+1), NR )
 1361:             DO 3950 p = 1, NR
 1362:                TEMP1 = DZNRM2(NR-p+1,CWORK(2*N+(p-1)*NR+p),1)
 1363:                CALL ZDSCAL(NR-p+1,ONE/TEMP1,CWORK(2*N+(p-1)*NR+p),1)
 1364:  3950       CONTINUE
 1365:             CALL ZPOCON('Lower',NR,CWORK(2*N+1),NR,ONE,TEMP1,
 1366:      $                   CWORK(2*N+NR*NR+1),RWORK,IERR)
 1367:             CONDR1 = ONE / DSQRT(TEMP1)
 1368: *           .. here need a second oppinion on the condition number
 1369: *           .. then assume worst case scenario
 1370: *           R1 is OK for inverse <=> CONDR1 .LT. DBLE(N)
 1371: *           more conservative    <=> CONDR1 .LT. SQRT(DBLE(N))
 1372: *
 1373:             COND_OK = DSQRT(DSQRT(DBLE(NR)))
 1374: *[TP]       COND_OK is a tuning parameter.
 1375: *
 1376:             IF ( CONDR1 .LT. COND_OK ) THEN
 1377: *              .. the second QRF without pivoting. Note: in an optimized
 1378: *              implementation, this QRF should be implemented as the QRF
 1379: *              of a lower triangular matrix.
 1380: *              R1^* = Q2 * R2
 1381:                CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
 1382:      $              LWORK-2*N, IERR )
 1383: *
 1384:                IF ( L2PERT ) THEN
 1385:                   XSC = DSQRT(SMALL)/EPSLN
 1386:                   DO 3959 p = 2, NR
 1387:                      DO 3958 q = 1, p - 1
 1388:                         CTEMP=DCMPLX(XSC*DMIN1(ABS(V(p,p)),ABS(V(q,q))),
 1389:      $                              ZERO)
 1390:                         IF ( ABS(V(q,p)) .LE. TEMP1 )
 1391: *     $                     V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
 1392:      $                     V(q,p) = CTEMP    
 1393:  3958                CONTINUE
 1394:  3959             CONTINUE
 1395:                END IF
 1396: *
 1397:                IF ( NR .NE. N )
 1398:      $         CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N )
 1399: *              .. save ...
 1400: *
 1401: *           .. this transposed copy should be better than naive
 1402:                DO 1969 p = 1, NR - 1
 1403:                   CALL ZCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 )
 1404:                   CALL ZLACGV(NR-p+1, V(p,p), 1 )
 1405:  1969          CONTINUE
 1406:                V(NR,NR)=DCONJG(V(NR,NR))   
 1407: *
 1408:                CONDR2 = CONDR1
 1409: *
 1410:             ELSE
 1411: *
 1412: *              .. ill-conditioned case: second QRF with pivoting
 1413: *              Note that windowed pivoting would be equaly good
 1414: *              numerically, and more run-time efficient. So, in
 1415: *              an optimal implementation, the next call to ZGEQP3
 1416: *              should be replaced with eg. CALL ZGEQPX (ACM TOMS #782)
 1417: *              with properly (carefully) chosen parameters.
 1418: *
 1419: *              R1^* * P2 = Q2 * R2
 1420:                DO 3003 p = 1, NR
 1421:                   IWORK(N+p) = 0
 1422:  3003          CONTINUE
 1423:                CALL ZGEQP3( N, NR, V, LDV, IWORK(N+1), CWORK(N+1),
 1424:      $                  CWORK(2*N+1), LWORK-2*N, RWORK, IERR )
 1425: **               CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
 1426: **     $              LWORK-2*N, IERR )
 1427:                IF ( L2PERT ) THEN
 1428:                   XSC = DSQRT(SMALL)
 1429:                   DO 3969 p = 2, NR
 1430:                      DO 3968 q = 1, p - 1
 1431:                         CTEMP=DCMPLX(XSC*DMIN1(ABS(V(p,p)),ABS(V(q,q))),
 1432:      $                                ZERO)
 1433:                         IF ( ABS(V(q,p)) .LE. TEMP1 )
 1434: *     $                     V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
 1435:      $                     V(q,p) = CTEMP                     
 1436:  3968                CONTINUE
 1437:  3969             CONTINUE
 1438:                END IF
 1439: *
 1440:                CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N )
 1441: *
 1442:                IF ( L2PERT ) THEN
 1443:                   XSC = DSQRT(SMALL)
 1444:                   DO 8970 p = 2, NR
 1445:                      DO 8971 q = 1, p - 1
 1446:                         CTEMP=DCMPLX(XSC*DMIN1(ABS(V(p,p)),ABS(V(q,q))),
 1447:      $                               ZERO)
 1448: *                        V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) )
 1449:                         V(p,q) = - CTEMP      
 1450:  8971                CONTINUE
 1451:  8970             CONTINUE
 1452:                ELSE
 1453:                   CALL ZLASET( 'L',NR-1,NR-1,CZERO,CZERO,V(2,1),LDV )
 1454:                END IF
 1455: *              Now, compute R2 = L3 * Q3, the LQ factorization.
 1456:                CALL ZGELQF( NR, NR, V, LDV, CWORK(2*N+N*NR+1),
 1457:      $               CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR )
 1458: *              .. and estimate the condition number
 1459:                CALL ZLACPY( 'L',NR,NR,V,LDV,CWORK(2*N+N*NR+NR+1),NR )
 1460:                DO 4950 p = 1, NR
 1461:                   TEMP1 = DZNRM2( p, CWORK(2*N+N*NR+NR+p), NR )
 1462:                   CALL ZDSCAL( p, ONE/TEMP1, CWORK(2*N+N*NR+NR+p), NR )
 1463:  4950          CONTINUE
 1464:                CALL ZPOCON( 'L',NR,CWORK(2*N+N*NR+NR+1),NR,ONE,TEMP1,
 1465:      $              CWORK(2*N+N*NR+NR+NR*NR+1),RWORK,IERR ) 
 1466:                CONDR2 = ONE / DSQRT(TEMP1)
 1467: *
 1468: *
 1469:                IF ( CONDR2 .GE. COND_OK ) THEN
 1470: *                 .. save the Householder vectors used for Q3
 1471: *                 (this overwrittes the copy of R2, as it will not be
 1472: *                 needed in this branch, but it does not overwritte the
 1473: *                 Huseholder vectors of Q2.).
 1474:                   CALL ZLACPY( 'U', NR, NR, V, LDV, CWORK(2*N+1), N )
 1475: *                 .. and the rest of the information on Q3 is in
 1476: *                 WORK(2*N+N*NR+1:2*N+N*NR+N)
 1477:                END IF
 1478: *
 1479:             END IF
 1480: *
 1481:             IF ( L2PERT ) THEN
 1482:                XSC = DSQRT(SMALL)
 1483:                DO 4968 q = 2, NR
 1484:                   CTEMP = XSC * V(q,q)
 1485:                   DO 4969 p = 1, q - 1
 1486: *                     V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) )
 1487:                      V(p,q) = - CTEMP
 1488:  4969             CONTINUE
 1489:  4968          CONTINUE
 1490:             ELSE
 1491:                CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV )
 1492:             END IF
 1493: *
 1494: *        Second preconditioning finished; continue with Jacobi SVD
 1495: *        The input matrix is lower trinagular.
 1496: *
 1497: *        Recover the right singular vectors as solution of a well
 1498: *        conditioned triangular matrix equation.
 1499: *
 1500:             IF ( CONDR1 .LT. COND_OK ) THEN
 1501: *
 1502:                CALL ZGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, LDU,
 1503:      $              CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,RWORK,
 1504:      $              LRWORK, INFO )
 1505:                SCALEM  = RWORK(1)
 1506:                NUMRANK = NINT(RWORK(2))
 1507:                DO 3970 p = 1, NR
 1508:                   CALL ZCOPY(  NR, V(1,p), 1, U(1,p), 1 )
 1509:                   CALL ZDSCAL( NR, SVA(p),    V(1,p), 1 )
 1510:  3970          CONTINUE
 1511: 
 1512: *        .. pick the right matrix equation and solve it
 1513: *
 1514:                IF ( NR .EQ. N ) THEN
 1515: * :))             .. best case, R1 is inverted. The solution of this matrix
 1516: *                 equation is Q2*V2 = the product of the Jacobi rotations
 1517: *                 used in ZGESVJ, premultiplied with the orthogonal matrix
 1518: *                 from the second QR factorization.
 1519:                   CALL ZTRSM('L','U','N','N', NR,NR,CONE, A,LDA, V,LDV)
 1520:                ELSE
 1521: *                 .. R1 is well conditioned, but non-square. Adjoint of R2
 1522: *                 is inverted to get the product of the Jacobi rotations
 1523: *                 used in ZGESVJ. The Q-factor from the second QR
 1524: *                 factorization is then built in explicitly.
 1525:                   CALL ZTRSM('L','U','C','N',NR,NR,CONE,CWORK(2*N+1),
 1526:      $                 N,V,LDV)
 1527:                   IF ( NR .LT. N ) THEN
 1528:                   CALL ZLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV)
 1529:                   CALL ZLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV)
 1530:                   CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV)
 1531:                   END IF
 1532:                   CALL ZUNMQR('L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1),
 1533:      $                V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR)
 1534:                END IF
 1535: *
 1536:             ELSE IF ( CONDR2 .LT. COND_OK ) THEN
 1537: *
 1538: *              The matrix R2 is inverted. The solution of the matrix equation
 1539: *              is Q3^* * V3 = the product of the Jacobi rotations (appplied to
 1540: *              the lower triangular L3 from the LQ factorization of
 1541: *              R2=L3*Q3), pre-multiplied with the transposed Q3.
 1542:                CALL ZGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U,
 1543:      $              LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, 
 1544:      $          RWORK, LRWORK, INFO )
 1545:                SCALEM  = RWORK(1)
 1546:                NUMRANK = NINT(RWORK(2))
 1547:                DO 3870 p = 1, NR
 1548:                   CALL ZCOPY( NR, V(1,p), 1, U(1,p), 1 )
 1549:                   CALL ZDSCAL( NR, SVA(p),    U(1,p), 1 )
 1550:  3870          CONTINUE
 1551:                CALL ZTRSM('L','U','N','N',NR,NR,CONE,CWORK(2*N+1),N,
 1552:      $                    U,LDU)
 1553: *              .. apply the permutation from the second QR factorization
 1554:                DO 873 q = 1, NR
 1555:                   DO 872 p = 1, NR
 1556:                      CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q)
 1557:  872              CONTINUE
 1558:                   DO 874 p = 1, NR
 1559:                      U(p,q) = CWORK(2*N+N*NR+NR+p)
 1560:  874              CONTINUE
 1561:  873           CONTINUE
 1562:                IF ( NR .LT. N ) THEN
 1563:                   CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV )
 1564:                   CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV )
 1565:                   CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV)
 1566:                END IF
 1567:                CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1),
 1568:      $              V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
 1569:             ELSE
 1570: *              Last line of defense.
 1571: * #:(          This is a rather pathological case: no scaled condition
 1572: *              improvement after two pivoted QR factorizations. Other
 1573: *              possibility is that the rank revealing QR factorization
 1574: *              or the condition estimator has failed, or the COND_OK
 1575: *              is set very close to ONE (which is unnecessary). Normally,
 1576: *              this branch should never be executed, but in rare cases of
 1577: *              failure of the RRQR or condition estimator, the last line of
 1578: *              defense ensures that ZGEJSV completes the task.
 1579: *              Compute the full SVD of L3 using ZGESVJ with explicit
 1580: *              accumulation of Jacobi rotations.
 1581:                CALL ZGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U,
 1582:      $              LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, 
 1583:      $                         RWORK, LRWORK, INFO )
 1584:                SCALEM  = RWORK(1)
 1585:                NUMRANK = NINT(RWORK(2))
 1586:                IF ( NR .LT. N ) THEN
 1587:                   CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV )
 1588:                   CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV )
 1589:                   CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV)
 1590:                END IF
 1591:                CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1),
 1592:      $              V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
 1593: *
 1594:                CALL ZUNMLQ( 'L', 'C', NR, NR, NR, CWORK(2*N+1), N,
 1595:      $              CWORK(2*N+N*NR+1), U, LDU, CWORK(2*N+N*NR+NR+1),
 1596:      $              LWORK-2*N-N*NR-NR, IERR )
 1597:                DO 773 q = 1, NR
 1598:                   DO 772 p = 1, NR
 1599:                      CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q)
 1600:  772              CONTINUE
 1601:                   DO 774 p = 1, NR
 1602:                      U(p,q) = CWORK(2*N+N*NR+NR+p)
 1603:  774              CONTINUE
 1604:  773           CONTINUE
 1605: *
 1606:             END IF
 1607: *
 1608: *           Permute the rows of V using the (column) permutation from the
 1609: *           first QRF. Also, scale the columns to make them unit in
 1610: *           Euclidean norm. This applies to all cases.
 1611: *
 1612:             TEMP1 = DSQRT(DBLE(N)) * EPSLN
 1613:             DO 1972 q = 1, N
 1614:                DO 972 p = 1, N
 1615:                   CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q)
 1616:   972          CONTINUE
 1617:                DO 973 p = 1, N
 1618:                   V(p,q) = CWORK(2*N+N*NR+NR+p)
 1619:   973          CONTINUE
 1620:                XSC = ONE / DZNRM2( N, V(1,q), 1 )
 1621:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1622:      $           CALL ZDSCAL( N, XSC, V(1,q), 1 )
 1623:  1972       CONTINUE
 1624: *           At this moment, V contains the right singular vectors of A.
 1625: *           Next, assemble the left singular vector matrix U (M x N).
 1626:             IF ( NR .LT. M ) THEN
 1627:                CALL ZLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU)
 1628:                IF ( NR .LT. N1 ) THEN
 1629:                   CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU)
 1630:                   CALL ZLASET('A',M-NR,N1-NR,CZERO,CONE,
 1631:      $                        U(NR+1,NR+1),LDU)
 1632:                END IF
 1633:             END IF
 1634: *
 1635: *           The Q matrix from the first QRF is built into the left singular
 1636: *           matrix U. This applies to all cases.
 1637: *
 1638:             CALL ZUNMQR( 'Left', 'No_Tr', M, N1, N, A, LDA, CWORK, U,
 1639:      $           LDU, CWORK(N+1), LWORK-N, IERR )
 1640: 
 1641: *           The columns of U are normalized. The cost is O(M*N) flops.
 1642:             TEMP1 = DSQRT(DBLE(M)) * EPSLN
 1643:             DO 1973 p = 1, NR
 1644:                XSC = ONE / DZNRM2( M, U(1,p), 1 )
 1645:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1646:      $          CALL ZDSCAL( M, XSC, U(1,p), 1 )
 1647:  1973       CONTINUE
 1648: *
 1649: *           If the initial QRF is computed with row pivoting, the left
 1650: *           singular vectors must be adjusted.
 1651: *
 1652:             IF ( ROWPIV )
 1653:      $          CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 1654: *
 1655:          ELSE
 1656: *
 1657: *        .. the initial matrix A has almost orthogonal columns and
 1658: *        the second QRF is not needed
 1659: *
 1660:             CALL ZLACPY( 'Upper', N, N, A, LDA, CWORK(N+1), N )
 1661:             IF ( L2PERT ) THEN
 1662:                XSC = DSQRT(SMALL)
 1663:                DO 5970 p = 2, N
 1664:                   CTEMP = XSC * CWORK( N + (p-1)*N + p )
 1665:                   DO 5971 q = 1, p - 1
 1666: *                     CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
 1667: *     $                                        ABS(CWORK(N+(p-1)*N+q)) )
 1668:                      CWORK(N+(q-1)*N+p)=-CTEMP           
 1669:  5971             CONTINUE
 1670:  5970          CONTINUE
 1671:             ELSE
 1672:                CALL ZLASET( 'Lower',N-1,N-1,CZERO,CZERO,CWORK(N+2),N )
 1673:             END IF
 1674: *
 1675:             CALL ZGESVJ( 'Upper', 'U', 'N', N, N, CWORK(N+1), N, SVA,
 1676:      $           N, U, LDU, CWORK(N+N*N+1), LWORK-N-N*N, RWORK, LRWORK, 
 1677:      $       INFO )
 1678: *
 1679:             SCALEM  = RWORK(1)
 1680:             NUMRANK = NINT(RWORK(2))
 1681:             DO 6970 p = 1, N
 1682:                CALL ZCOPY( N, CWORK(N+(p-1)*N+1), 1, U(1,p), 1 )
 1683:                CALL ZDSCAL( N, SVA(p), CWORK(N+(p-1)*N+1), 1 )
 1684:  6970       CONTINUE
 1685: *
 1686:             CALL ZTRSM( 'Left', 'Upper', 'NoTrans', 'No UD', N, N,
 1687:      $           CONE, A, LDA, CWORK(N+1), N )
 1688:             DO 6972 p = 1, N
 1689:                CALL ZCOPY( N, CWORK(N+p), N, V(IWORK(p),1), LDV )
 1690:  6972       CONTINUE
 1691:             TEMP1 = DSQRT(DBLE(N))*EPSLN
 1692:             DO 6971 p = 1, N
 1693:                XSC = ONE / DZNRM2( N, V(1,p), 1 )
 1694:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1695:      $            CALL ZDSCAL( N, XSC, V(1,p), 1 )
 1696:  6971       CONTINUE
 1697: *
 1698: *           Assemble the left singular vector matrix U (M x N).
 1699: *
 1700:             IF ( N .LT. M ) THEN
 1701:                CALL ZLASET( 'A',  M-N, N, CZERO, CZERO, U(N+1,1), LDU )
 1702:                IF ( N .LT. N1 ) THEN
 1703:                   CALL ZLASET('A',N,  N1-N, CZERO, CZERO,  U(1,N+1),LDU)
 1704:                   CALL ZLASET( 'A',M-N,N1-N, CZERO, CONE,U(N+1,N+1),LDU)
 1705:                END IF
 1706:             END IF
 1707:             CALL ZUNMQR( 'Left', 'No Tr', M, N1, N, A, LDA, CWORK, U,
 1708:      $           LDU, CWORK(N+1), LWORK-N, IERR )
 1709:             TEMP1 = DSQRT(DBLE(M))*EPSLN
 1710:             DO 6973 p = 1, N1
 1711:                XSC = ONE / DZNRM2( M, U(1,p), 1 )
 1712:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1713:      $            CALL ZDSCAL( M, XSC, U(1,p), 1 )
 1714:  6973       CONTINUE
 1715: *
 1716:             IF ( ROWPIV )
 1717:      $         CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 1718: *
 1719:          END IF
 1720: *
 1721: *        end of the  >> almost orthogonal case <<  in the full SVD
 1722: *
 1723:          ELSE
 1724: *
 1725: *        This branch deploys a preconditioned Jacobi SVD with explicitly
 1726: *        accumulated rotations. It is included as optional, mainly for
 1727: *        experimental purposes. It does perfom well, and can also be used.
 1728: *        In this implementation, this branch will be automatically activated
 1729: *        if the  condition number sigma_max(A) / sigma_min(A) is predicted
 1730: *        to be greater than the overflow threshold. This is because the
 1731: *        a posteriori computation of the singular vectors assumes robust
 1732: *        implementation of BLAS and some LAPACK procedures, capable of working
 1733: *        in presence of extreme values. Since that is not always the case, ...
 1734: *
 1735:          DO 7968 p = 1, NR
 1736:             CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
 1737:             CALL ZLACGV( N-p+1, V(p,p), 1 )
 1738:  7968    CONTINUE
 1739: *
 1740:          IF ( L2PERT ) THEN
 1741:             XSC = DSQRT(SMALL/EPSLN)
 1742:             DO 5969 q = 1, NR
 1743:                CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO)
 1744:                DO 5968 p = 1, N
 1745:                   IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 )
 1746:      $                .OR. ( p .LT. q ) )
 1747: *     $                V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
 1748:      $                V(p,q) = CTEMP        
 1749:                   IF ( p .LT. q ) V(p,q) = - V(p,q)
 1750:  5968          CONTINUE
 1751:  5969       CONTINUE
 1752:          ELSE
 1753:             CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV )
 1754:          END IF
 1755: 
 1756:          CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
 1757:      $        LWORK-2*N, IERR )
 1758:          CALL ZLACPY( 'L', N, NR, V, LDV, CWORK(2*N+1), N )
 1759: *
 1760:          DO 7969 p = 1, NR
 1761:             CALL ZCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 )
 1762:             CALL ZLACGV( NR-p+1, U(p,p), 1 )
 1763:  7969    CONTINUE
 1764: 
 1765:          IF ( L2PERT ) THEN
 1766:             XSC = DSQRT(SMALL/EPSLN)
 1767:             DO 9970 q = 2, NR
 1768:                DO 9971 p = 1, q - 1
 1769:                   CTEMP = DCMPLX(XSC * DMIN1(ABS(U(p,p)),ABS(U(q,q))),
 1770:      $                            ZERO)
 1771: *                  U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) )
 1772:                   U(p,q) = - CTEMP     
 1773:  9971          CONTINUE
 1774:  9970       CONTINUE
 1775:          ELSE
 1776:             CALL ZLASET('U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU )
 1777:          END IF
 1778: 
 1779:          CALL ZGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA,
 1780:      $        N, V, LDV, CWORK(2*N+N*NR+1), LWORK-2*N-N*NR, 
 1781:      $         RWORK, LRWORK, INFO )
 1782:          SCALEM  = RWORK(1)
 1783:          NUMRANK = NINT(RWORK(2))
 1784: 
 1785:          IF ( NR .LT. N ) THEN
 1786:             CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV )
 1787:             CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV )
 1788:             CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV )
 1789:          END IF
 1790: 
 1791:          CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1),
 1792:      $        V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
 1793: *
 1794: *           Permute the rows of V using the (column) permutation from the
 1795: *           first QRF. Also, scale the columns to make them unit in
 1796: *           Euclidean norm. This applies to all cases.
 1797: *
 1798:             TEMP1 = DSQRT(DBLE(N)) * EPSLN
 1799:             DO 7972 q = 1, N
 1800:                DO 8972 p = 1, N
 1801:                   CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q)
 1802:  8972          CONTINUE
 1803:                DO 8973 p = 1, N
 1804:                   V(p,q) = CWORK(2*N+N*NR+NR+p)
 1805:  8973          CONTINUE
 1806:                XSC = ONE / DZNRM2( N, V(1,q), 1 )
 1807:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1808:      $           CALL ZDSCAL( N, XSC, V(1,q), 1 )
 1809:  7972       CONTINUE
 1810: *
 1811: *           At this moment, V contains the right singular vectors of A.
 1812: *           Next, assemble the left singular vector matrix U (M x N).
 1813: *
 1814:          IF ( NR .LT. M ) THEN
 1815:             CALL ZLASET( 'A',  M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU )
 1816:             IF ( NR .LT. N1 ) THEN
 1817:                CALL ZLASET('A',NR,  N1-NR, CZERO, CZERO,  U(1,NR+1),LDU)
 1818:                CALL ZLASET('A',M-NR,N1-NR, CZERO, CONE,U(NR+1,NR+1),LDU)
 1819:             END IF
 1820:          END IF
 1821: *
 1822:          CALL ZUNMQR( 'Left', 'No Tr', M, N1, N, A, LDA, CWORK, U,
 1823:      $        LDU, CWORK(N+1), LWORK-N, IERR )
 1824: *
 1825:             IF ( ROWPIV )
 1826:      $         CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 1827: *
 1828: *
 1829:          END IF
 1830:          IF ( TRANSP ) THEN
 1831: *           .. swap U and V because the procedure worked on A^*
 1832:             DO 6974 p = 1, N
 1833:                CALL ZSWAP( N, U(1,p), 1, V(1,p), 1 )
 1834:  6974       CONTINUE
 1835:          END IF
 1836: *
 1837:       END IF
 1838: *     end of the full SVD
 1839: *
 1840: *     Undo scaling, if necessary (and possible)
 1841: *
 1842:       IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN
 1843:          CALL DLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR )
 1844:          USCAL1 = ONE
 1845:          USCAL2 = ONE
 1846:       END IF
 1847: *
 1848:       IF ( NR .LT. N ) THEN
 1849:          DO 3004 p = NR+1, N
 1850:             SVA(p) = ZERO
 1851:  3004    CONTINUE
 1852:       END IF
 1853: *
 1854:       RWORK(1) = USCAL2 * SCALEM
 1855:       RWORK(2) = USCAL1
 1856:       IF ( ERREST ) RWORK(3) = SCONDA
 1857:       IF ( LSVEC .AND. RSVEC ) THEN
 1858:          RWORK(4) = CONDR1
 1859:          RWORK(5) = CONDR2
 1860:       END IF
 1861:       IF ( L2TRAN ) THEN
 1862:          RWORK(6) = ENTRA
 1863:          RWORK(7) = ENTRAT
 1864:       END IF
 1865: *
 1866:       IWORK(1) = NR
 1867:       IWORK(2) = NUMRANK
 1868:       IWORK(3) = WARNING
 1869: *
 1870:       RETURN
 1871: *     ..
 1872: *     .. END OF ZGEJSV
 1873: *     ..
 1874:       END
 1875: *

CVSweb interface <joel.bertrand@systella.fr>