rpl/lapack/lapack/zgejsv.f - view

Return to zgejsv.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / zgejsv.f
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Thu Nov 26 11:44:21 2015 UTC (8 years, 6 months ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_24, HEAD

Mise à jour de Lapack (3.6.0) et du numéro de version du RPL/2.

    1: *> \brief \b ZGEJSV
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at 
    6: *            http://www.netlib.org/lapack/explore-html/ 
    7: *
    8: *> \htmlonly
    9: *> Download ZGEJSV + dependencies 
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgejsv.f"> 
   11: *> [TGZ]</a> 
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgejsv.f"> 
   13: *> [ZIP]</a> 
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgejsv.f"> 
   15: *> [TXT]</a>
   16: *> \endhtmlonly 
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *     SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
   22: *                         M, N, A, LDA, SVA, U, LDU, V, LDV,
   23: *                         CWORK, LWORK, RWORK, LRWORK, IWORK, INFO )
   24: * 
   25: *     .. Scalar Arguments ..
   26: *     IMPLICIT    NONE
   27: *     INTEGER     INFO, LDA, LDU, LDV, LWORK, M, N
   28: *     ..
   29: *     .. Array Arguments ..
   30: *     DOUBLE COMPLEX     A( LDA, * ),  U( LDU, * ), V( LDV, * ), CWORK( LWORK )
   31: *     DOUBLE PRECISION   SVA( N ), RWORK( LRWORK )      
   32: *     INTEGER     IWORK( * )
   33: *     CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
   34: *       ..
   35: *  
   36: *
   37: *> \par Purpose:
   38: *  =============
   39: *>
   40: *> \verbatim
   41: *>
   42: * ZGEJSV computes the singular value decomposition (SVD) of a real M-by-N
   43: * matrix [A], where M >= N. The SVD of [A] is written as
   44: *
   45: *              [A] = [U] * [SIGMA] * [V]^*,
   46: *
   47: * where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
   48: * diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
   49: * [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
   50: * the singular values of [A]. The columns of [U] and [V] are the left and
   51: * the right singular vectors of [A], respectively. The matrices [U] and [V]
   52: * are computed and stored in the arrays U and V, respectively. The diagonal
   53: * of [SIGMA] is computed and stored in the array SVA.
   54: *
   55: *  Arguments:
   56: *  ==========
   57: *>
   58: *> \param[in] JOBA
   59: *> \verbatim
   60: *>          JOBA is CHARACTER*1
   61: *>         Specifies the level of accuracy:
   62: *>       = 'C': This option works well (high relative accuracy) if A = B * D,
   63: *>              with well-conditioned B and arbitrary diagonal matrix D.
   64: *>              The accuracy cannot be spoiled by COLUMN scaling. The
   65: *>              accuracy of the computed output depends on the condition of
   66: *>              B, and the procedure aims at the best theoretical accuracy.
   67: *>              The relative error max_{i=1:N}|d sigma_i| / sigma_i is
   68: *>              bounded by f(M,N)*epsilon* cond(B), independent of D.
   69: *>              The input matrix is preprocessed with the QRF with column
   70: *>              pivoting. This initial preprocessing and preconditioning by
   71: *>              a rank revealing QR factorization is common for all values of
   72: *>              JOBA. Additional actions are specified as follows:
   73: *>       = 'E': Computation as with 'C' with an additional estimate of the
   74: *>              condition number of B. It provides a realistic error bound.
   75: *>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
   76: *>              D1, D2, and well-conditioned matrix C, this option gives
   77: *>              higher accuracy than the 'C' option. If the structure of the
   78: *>              input matrix is not known, and relative accuracy is
   79: *>              desirable, then this option is advisable. The input matrix A
   80: *>              is preprocessed with QR factorization with FULL (row and
   81: *>              column) pivoting.
   82: *>       = 'G'  Computation as with 'F' with an additional estimate of the
   83: *>              condition number of B, where A=D*B. If A has heavily weighted
   84: *>              rows, then using this condition number gives too pessimistic
   85: *>              error bound.
   86: *>       = 'A': Small singular values are the noise and the matrix is treated
   87: *>              as numerically rank defficient. The error in the computed
   88: *>              singular values is bounded by f(m,n)*epsilon*||A||.
   89: *>              The computed SVD A = U * S * V^* restores A up to
   90: *>              f(m,n)*epsilon*||A||.
   91: *>              This gives the procedure the licence to discard (set to zero)
   92: *>              all singular values below N*epsilon*||A||.
   93: *>       = 'R': Similar as in 'A'. Rank revealing property of the initial
   94: *>              QR factorization is used do reveal (using triangular factor)
   95: *>              a gap sigma_{r+1} < epsilon * sigma_r in which case the
   96: *>              numerical RANK is declared to be r. The SVD is computed with
   97: *>              absolute error bounds, but more accurately than with 'A'.
   98: *> \endverbatim
   99: *> 
  100: *> \param[in] JOBU
  101: *> \verbatim
  102: *>          JOBU is CHARACTER*1
  103: *>         Specifies whether to compute the columns of U:
  104: *>       = 'U': N columns of U are returned in the array U.
  105: *>       = 'F': full set of M left sing. vectors is returned in the array U.
  106: *>       = 'W': U may be used as workspace of length M*N. See the description
  107: *>              of U.
  108: *>       = 'N': U is not computed.
  109: *> \endverbatim
  110: *> 
  111: *> \param[in] JOBV
  112: *> \verbatim
  113: *>          JOBV is CHARACTER*1
  114: *>         Specifies whether to compute the matrix V:
  115: *>       = 'V': N columns of V are returned in the array V; Jacobi rotations
  116: *>              are not explicitly accumulated.
  117: *>       = 'J': N columns of V are returned in the array V, but they are
  118: *>              computed as the product of Jacobi rotations. This option is
  119: *>              allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
  120: *>       = 'W': V may be used as workspace of length N*N. See the description
  121: *>              of V.
  122: *>       = 'N': V is not computed.
  123: *> \endverbatim
  124: *> 
  125: *> \param[in] JOBR
  126: *> \verbatim
  127: *>          JOBR is CHARACTER*1
  128: *>         Specifies the RANGE for the singular values. Issues the licence to
  129: *>         set to zero small positive singular values if they are outside
  130: *>         specified range. If A .NE. 0 is scaled so that the largest singular
  131: *>         value of c*A is around SQRT(BIG), BIG=DLAMCH('O'), then JOBR issues
  132: *>         the licence to kill columns of A whose norm in c*A is less than
  133: *>         SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
  134: *>         where SFMIN=DLAMCH('S'), EPSLN=DLAMCH('E').
  135: *>       = 'N': Do not kill small columns of c*A. This option assumes that
  136: *>              BLAS and QR factorizations and triangular solvers are
  137: *>              implemented to work in that range. If the condition of A
  138: *>              is greater than BIG, use ZGESVJ.
  139: *>       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
  140: *>              (roughly, as described above). This option is recommended.
  141: *>                                             ===========================
  142: *>         For computing the singular values in the FULL range [SFMIN,BIG]
  143: *>         use ZGESVJ.
  144: *> \endverbatim
  145: *> 
  146: *> \param[in] JOBT
  147: *> \verbatim
  148: *>          JOBT is CHARACTER*1
  149: *>         If the matrix is square then the procedure may determine to use
  150: *>         transposed A if A^* seems to be better with respect to convergence.
  151: *>         If the matrix is not square, JOBT is ignored. This is subject to
  152: *>         changes in the future.
  153: *>         The decision is based on two values of entropy over the adjoint
  154: *>         orbit of A^* * A. See the descriptions of WORK(6) and WORK(7).
  155: *>       = 'T': transpose if entropy test indicates possibly faster
  156: *>         convergence of Jacobi process if A^* is taken as input. If A is
  157: *>         replaced with A^*, then the row pivoting is included automatically.
  158: *>       = 'N': do not speculate.
  159: *>         This option can be used to compute only the singular values, or the
  160: *>         full SVD (U, SIGMA and V). For only one set of singular vectors
  161: *>         (U or V), the caller should provide both U and V, as one of the
  162: *>         matrices is used as workspace if the matrix A is transposed.
  163: *>         The implementer can easily remove this constraint and make the
  164: *>         code more complicated. See the descriptions of U and V.
  165: *> \endverbatim
  166: *> 
  167: *> \param[in] JOBP
  168: *> \verbatim
  169: *>          JOBP is CHARACTER*1
  170: *>         Issues the licence to introduce structured perturbations to drown
  171: *>         denormalized numbers. This licence should be active if the
  172: *>         denormals are poorly implemented, causing slow computation,
  173: *>         especially in cases of fast convergence (!). For details see [1,2].
  174: *>         For the sake of simplicity, this perturbations are included only
  175: *>         when the full SVD or only the singular values are requested. The
  176: *>         implementer/user can easily add the perturbation for the cases of
  177: *>         computing one set of singular vectors.
  178: *>       = 'P': introduce perturbation
  179: *>       = 'N': do not perturb
  180: *> \endverbatim
  181: *>
  182: *> \param[in] M
  183: *> \verbatim
  184: *>          M is INTEGER
  185: *>         The number of rows of the input matrix A.  M >= 0.
  186: *> \endverbatim
  187: *>
  188: *> \param[in] N
  189: *> \verbatim
  190: *>          N is INTEGER
  191: *>         The number of columns of the input matrix A. M >= N >= 0.
  192: *> \endverbatim
  193: *>
  194: *> \param[in,out] A
  195: *> \verbatim
  196: *>          A is DOUBLE COMPLEX array, dimension (LDA,N)
  197: *>          On entry, the M-by-N matrix A.
  198: *> \endverbatim
  199: *>
  200: *> \param[in] LDA
  201: *> \verbatim
  202: *>          LDA is INTEGER
  203: *>          The leading dimension of the array A.  LDA >= max(1,M).
  204: *> \endverbatim
  205: *>
  206: *> \param[out] SVA
  207: *> \verbatim
  208: *>          SVA is DOUBLE PRECISION array, dimension (N)
  209: *>          On exit,
  210: *>          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
  211: *>            computation SVA contains Euclidean column norms of the
  212: *>            iterated matrices in the array A.
  213: *>          - For WORK(1) .NE. WORK(2): The singular values of A are
  214: *>            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
  215: *>            sigma_max(A) overflows or if small singular values have been
  216: *>            saved from underflow by scaling the input matrix A.
  217: *>          - If JOBR='R' then some of the singular values may be returned
  218: *>            as exact zeros obtained by "set to zero" because they are
  219: *>            below the numerical rank threshold or are denormalized numbers.
  220: *> \endverbatim
  221: *>
  222: *> \param[out] U
  223: *> \verbatim
  224: *>          U is DOUBLE COMPLEX array, dimension ( LDU, N )
  225: *>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
  226: *>                         the left singular vectors.
  227: *>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
  228: *>                         the left singular vectors, including an ONB
  229: *>                         of the orthogonal complement of the Range(A).
  230: *>          If JOBU = 'W'  .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
  231: *>                         then U is used as workspace if the procedure
  232: *>                         replaces A with A^*. In that case, [V] is computed
  233: *>                         in U as left singular vectors of A^* and then
  234: *>                         copied back to the V array. This 'W' option is just
  235: *>                         a reminder to the caller that in this case U is
  236: *>                         reserved as workspace of length N*N.
  237: *>          If JOBU = 'N'  U is not referenced.
  238: *> \endverbatim
  239: *>
  240: *> \param[in] LDU
  241: *> \verbatim
  242: *>          LDU is INTEGER
  243: *>          The leading dimension of the array U,  LDU >= 1.
  244: *>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
  245: *> \endverbatim
  246: *>
  247: *> \param[out] V
  248: *> \verbatim
  249: *>          V is DOUBLE COMPLEX array, dimension ( LDV, N )
  250: *>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
  251: *>                         the right singular vectors;
  252: *>          If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
  253: *>                         then V is used as workspace if the pprocedure
  254: *>                         replaces A with A^*. In that case, [U] is computed
  255: *>                         in V as right singular vectors of A^* and then
  256: *>                         copied back to the U array. This 'W' option is just
  257: *>                         a reminder to the caller that in this case V is
  258: *>                         reserved as workspace of length N*N.
  259: *>          If JOBV = 'N'  V is not referenced.
  260: *> \endverbatim
  261: *>
  262: *> \param[in] LDV
  263: *> \verbatim
  264: *>          LDV is INTEGER
  265: *>          The leading dimension of the array V,  LDV >= 1.
  266: *>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
  267: *> \endverbatim
  268: *>
  269: *> \param[out] CWORK
  270: *> \verbatim
  271: *> CWORK (workspace)
  272: *>          CWORK is DOUBLE COMPLEX array, dimension at least LWORK.     
  273: *> \endverbatim
  274: *>
  275: *> \param[in] LWORK
  276: *> \verbatim
  277: *>          LWORK is INTEGER
  278: *>          Length of CWORK to confirm proper allocation of workspace.
  279: *>          LWORK depends on the job:
  280: *>
  281: *>          1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
  282: *>            1.1 .. no scaled condition estimate required (JOBE.EQ.'N'):
  283: *>               LWORK >= 2*N+1. This is the minimal requirement.
  284: *>               ->> For optimal performance (blocked code) the optimal value
  285: *>               is LWORK >= N + (N+1)*NB. Here NB is the optimal
  286: *>               block size for ZGEQP3 and ZGEQRF.
  287: *>               In general, optimal LWORK is computed as 
  288: *>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF)).        
  289: *>            1.2. .. an estimate of the scaled condition number of A is
  290: *>               required (JOBA='E', or 'G'). In this case, LWORK the minimal
  291: *>               requirement is LWORK >= N*N + 3*N.
  292: *>               ->> For optimal performance (blocked code) the optimal value 
  293: *>               is LWORK >= max(N+(N+1)*NB, N*N+3*N).
  294: *>               In general, the optimal length LWORK is computed as
  295: *>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), 
  296: *>                                                     N+N*N+LWORK(CPOCON)).
  297: *>
  298: *>          2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
  299: *>             (JOBU.EQ.'N')
  300: *>            -> the minimal requirement is LWORK >= 3*N.
  301: *>            -> For optimal performance, LWORK >= max(N+(N+1)*NB, 3*N,2*N+N*NB),
  302: *>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ,
  303: *>               CUNMLQ. In general, the optimal length LWORK is computed as
  304: *>               LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(CPOCON), N+LWORK(ZGESVJ),
  305: *>                       N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(CUNMLQ)).
  306: *>
  307: *>          3. If SIGMA and the left singular vectors are needed
  308: *>            -> the minimal requirement is LWORK >= 3*N.
  309: *>            -> For optimal performance:
  310: *>               if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB),
  311: *>               where NB is the optimal block size for ZGEQP3, ZGEQRF, CUNMQR.
  312: *>               In general, the optimal length LWORK is computed as
  313: *>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(CPOCON),
  314: *>                        2*N+LWORK(ZGEQRF), N+LWORK(CUNMQR)). 
  315: *>               
  316: *>          4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and 
  317: *>            4.1. if JOBV.EQ.'V'  
  318: *>               the minimal requirement is LWORK >= 5*N+2*N*N. 
  319: *>            4.2. if JOBV.EQ.'J' the minimal requirement is 
  320: *>               LWORK >= 4*N+N*N.
  321: *>            In both cases, the allocated CWORK can accomodate blocked runs
  322: *>            of ZGEQP3, ZGEQRF, ZGELQF, SUNMQR, CUNMLQ.
  323: *> \endverbatim
  324: *>
  325: *> \param[out] RWORK
  326: *> \verbatim
  327: *>          RWORK is DOUBLE PRECISION array, dimension at least LRWORK.
  328: *>          On exit,
  329: *>          RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1)
  330: *>                    such that SCALE*SVA(1:N) are the computed singular values
  331: *>                    of A. (See the description of SVA().)
  332: *>          RWORK(2) = See the description of RWORK(1).
  333: *>          RWORK(3) = SCONDA is an estimate for the condition number of
  334: *>                    column equilibrated A. (If JOBA .EQ. 'E' or 'G')
  335: *>                    SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
  336: *>                    It is computed using SPOCON. It holds
  337: *>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
  338: *>                    where R is the triangular factor from the QRF of A.
  339: *>                    However, if R is truncated and the numerical rank is
  340: *>                    determined to be strictly smaller than N, SCONDA is
  341: *>                    returned as -1, thus indicating that the smallest
  342: *>                    singular values might be lost.
  343: *>
  344: *>          If full SVD is needed, the following two condition numbers are
  345: *>          useful for the analysis of the algorithm. They are provied for
  346: *>          a developer/implementer who is familiar with the details of
  347: *>          the method.
  348: *>
  349: *>          RWORK(4) = an estimate of the scaled condition number of the
  350: *>                    triangular factor in the first QR factorization.
  351: *>          RWORK(5) = an estimate of the scaled condition number of the
  352: *>                    triangular factor in the second QR factorization.
  353: *>          The following two parameters are computed if JOBT .EQ. 'T'.
  354: *>          They are provided for a developer/implementer who is familiar
  355: *>          with the details of the method.
  356: *>          RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy
  357: *>                    of diag(A^* * A) / Trace(A^* * A) taken as point in the
  358: *>                    probability simplex.
  359: *>          RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).)
  360: *> \endverbatim
  361: *>
  362: *> \param[in] LRWORK
  363: *> \verbatim
  364: *>          LRWORK is INTEGER
  365: *>          Length of RWORK to confirm proper allocation of workspace.
  366: *>          LRWORK depends on the job:
  367: *>
  368: *>       1. If only singular values are requested i.e. if 
  369: *>          LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') 
  370: *>          then:
  371: *>          1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
  372: *>          then LRWORK = max( 7, N + 2 * M ). 
  373: *>          1.2. Otherwise, LRWORK  = max( 7, 2 * N ).
  374: *>       2. If singular values with the right singular vectors are requested
  375: *>          i.e. if 
  376: *>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. 
  377: *>          .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F'))
  378: *>          then:
  379: *>          2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
  380: *>          then LRWORK = max( 7, N + 2 * M ). 
  381: *>          2.2. Otherwise, LRWORK  = max( 7, 2 * N ).      
  382: *>       3. If singular values with the left singular vectors are requested, i.e. if    
  383: *>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
  384: *>          .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
  385: *>          then:
  386: *>          3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
  387: *>          then LRWORK = max( 7, N + 2 * M ). 
  388: *>          3.2. Otherwise, LRWORK  = max( 7, 2 * N ).    
  389: *>       4. If singular values with both the left and the right singular vectors 
  390: *>          are requested, i.e. if     
  391: *>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
  392: *>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
  393: *>          then:
  394: *>          4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
  395: *>          then LRWORK = max( 7, N + 2 * M ). 
  396: *>          4.2. Otherwise, LRWORK  = max( 7, 2 * N ).    
  397: *> \endverbatim
  398: *>          
  399: *> \param[out] IWORK
  400: *> \verbatim
  401: *>          IWORK is INTEGER array, of dimension:
  402: *>                If LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), then 
  403: *>                the dimension of IWORK is max( 3, 2 * N + M ).
  404: *>                Otherwise, the dimension of IWORK is 
  405: *>                -> max( 3, 2*N ) for full SVD
  406: *>                -> max( 3, N ) for singular values only or singular
  407: *>                   values with one set of singular vectors (left or right)
  408: *>          On exit,
  409: *>          IWORK(1) = the numerical rank determined after the initial
  410: *>                     QR factorization with pivoting. See the descriptions
  411: *>                     of JOBA and JOBR.
  412: *>          IWORK(2) = the number of the computed nonzero singular values
  413: *>          IWORK(3) = if nonzero, a warning message:
  414: *>                     If IWORK(3).EQ.1 then some of the column norms of A
  415: *>                     were denormalized floats. The requested high accuracy
  416: *>                     is not warranted by the data.
  417: *> \endverbatim
  418: *>
  419: *> \param[out] INFO
  420: *> \verbatim
  421: *>          INFO is INTEGER
  422: *>           < 0  : if INFO = -i, then the i-th argument had an illegal value.
  423: *>           = 0 :  successfull exit;
  424: *>           > 0 :  ZGEJSV  did not converge in the maximal allowed number
  425: *>                  of sweeps. The computed values may be inaccurate.
  426: *> \endverbatim
  427: *
  428: *  Authors:
  429: *  ========
  430: *
  431: *> \author Univ. of Tennessee 
  432: *> \author Univ. of California Berkeley 
  433: *> \author Univ. of Colorado Denver 
  434: *> \author NAG Ltd. 
  435: *
  436: *> \date November 2015
  437: *
  438: *> \ingroup complex16GEsing
  439: *
  440: *> \par Further Details:
  441: *  =====================
  442: *>
  443: *> \verbatim
  444: *>
  445: *>  ZGEJSV implements a preconditioned Jacobi SVD algorithm. It uses ZGEQP3,
  446: *>  ZGEQRF, and ZGELQF as preprocessors and preconditioners. Optionally, an
  447: *>  additional row pivoting can be used as a preprocessor, which in some
  448: *>  cases results in much higher accuracy. An example is matrix A with the
  449: *>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
  450: *>  diagonal matrices and C is well-conditioned matrix. In that case, complete
  451: *>  pivoting in the first QR factorizations provides accuracy dependent on the
  452: *>  condition number of C, and independent of D1, D2. Such higher accuracy is
  453: *>  not completely understood theoretically, but it works well in practice.
  454: *>  Further, if A can be written as A = B*D, with well-conditioned B and some
  455: *>  diagonal D, then the high accuracy is guaranteed, both theoretically and
  456: *>  in software, independent of D. For more details see [1], [2].
  457: *>     The computational range for the singular values can be the full range
  458: *>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
  459: *>  & LAPACK routines called by ZGEJSV are implemented to work in that range.
  460: *>  If that is not the case, then the restriction for safe computation with
  461: *>  the singular values in the range of normalized IEEE numbers is that the
  462: *>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
  463: *>  overflow. This code (ZGEJSV) is best used in this restricted range,
  464: *>  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
  465: *>  returned as zeros. See JOBR for details on this.
  466: *>     Further, this implementation is somewhat slower than the one described
  467: *>  in [1,2] due to replacement of some non-LAPACK components, and because
  468: *>  the choice of some tuning parameters in the iterative part (ZGESVJ) is
  469: *>  left to the implementer on a particular machine.
  470: *>     The rank revealing QR factorization (in this code: ZGEQP3) should be
  471: *>  implemented as in [3]. We have a new version of ZGEQP3 under development
  472: *>  that is more robust than the current one in LAPACK, with a cleaner cut in
  473: *>  rank defficient cases. It will be available in the SIGMA library [4].
  474: *>  If M is much larger than N, it is obvious that the inital QRF with
  475: *>  column pivoting can be preprocessed by the QRF without pivoting. That
  476: *>  well known trick is not used in ZGEJSV because in some cases heavy row
  477: *>  weighting can be treated with complete pivoting. The overhead in cases
  478: *>  M much larger than N is then only due to pivoting, but the benefits in
  479: *>  terms of accuracy have prevailed. The implementer/user can incorporate
  480: *>  this extra QRF step easily. The implementer can also improve data movement
  481: *>  (matrix transpose, matrix copy, matrix transposed copy) - this
  482: *>  implementation of ZGEJSV uses only the simplest, naive data movement.
  483: *
  484: *> \par Contributors:
  485: *  ==================
  486: *>
  487: *>  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
  488: *
  489: *> \par References:
  490: *  ================
  491: *>
  492: *> \verbatim
  493: *>
  494: * [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
  495: *     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
  496: *     LAPACK Working note 169.
  497: * [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
  498: *     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
  499: *     LAPACK Working note 170.
  500: * [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
  501: *     factorization software - a case study.
  502: *     ACM Trans. math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
  503: *     LAPACK Working note 176.
  504: * [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
  505: *     QSVD, (H,K)-SVD computations.
  506: *     Department of Mathematics, University of Zagreb, 2008.
  507: *> \endverbatim
  508: *
  509: *>  \par Bugs, examples and comments:
  510: *   =================================
  511: *>
  512: *>  Please report all bugs and send interesting examples and/or comments to
  513: *>  drmac@math.hr. Thank you.
  514: *>
  515: *  =====================================================================
  516:       SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
  517:      $                   M, N, A, LDA, SVA, U, LDU, V, LDV,
  518:      $                   CWORK, LWORK, RWORK, LRWORK, IWORK, INFO )
  519: *
  520: *  -- LAPACK computational routine (version 3.6.0) --
  521: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  522: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  523: *     November 2015
  524: *
  525: *     .. Scalar Arguments ..
  526:       IMPLICIT    NONE
  527:       INTEGER     INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N
  528: *     ..
  529: *     .. Array Arguments ..
  530:       DOUBLE COMPLEX   A( LDA, * ), U( LDU, * ), V( LDV, * ), 
  531:      $                 CWORK( LWORK )
  532:       DOUBLE PRECISION SVA( N ), RWORK( * )
  533:       INTEGER          IWORK( * )
  534:       CHARACTER*1      JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
  535: *     ..
  536: *
  537: *  ===========================================================================
  538: *
  539: *     .. Local Parameters ..
  540:       DOUBLE PRECISION ZERO,         ONE
  541:       PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
  542:       DOUBLE COMPLEX            CZERO,       CONE
  543:       PARAMETER  ( CZERO = ( 0.0D0, 0.0D0 ), CONE = ( 1.0D0, 0.0D0 ) )
  544: *     ..
  545: *     .. Local Scalars ..
  546:       DOUBLE COMPLEX   CTEMP
  547:       DOUBLE PRECISION AAPP,    AAQQ,   AATMAX, AATMIN, BIG,    BIG1,   
  548:      $                 COND_OK, CONDR1, CONDR2, ENTRA,  ENTRAT, EPSLN,  
  549:      $                 MAXPRJ,  SCALEM, SCONDA, SFMIN,  SMALL,  TEMP1,  
  550:      $                 USCAL1,  USCAL2, XSC
  551:       INTEGER IERR,   N1,     NR,     NUMRANK,        p, q,   WARNING
  552:       LOGICAL ALMORT, DEFR,   ERREST, GOSCAL, JRACC,  KILL,   LSVEC,
  553:      $        L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
  554:      $        NOSCAL, ROWPIV, RSVEC,  TRANSP
  555: *     ..
  556: *     .. Intrinsic Functions ..
  557:       INTRINSIC ABS,  DCMPLX, DCONJG, DLOG, DMAX1, DMIN1, DFLOAT,
  558:      $          MAX0, MIN0, NINT,  DSQRT
  559: *     ..
  560: *     .. External Functions ..
  561:       DOUBLE PRECISION      DLAMCH, DZNRM2
  562:       INTEGER   IDAMAX
  563:       LOGICAL   LSAME
  564:       EXTERNAL  IDAMAX, LSAME, DLAMCH, DZNRM2
  565: *     ..
  566: *     .. External Subroutines ..
  567:       EXTERNAL  ZCOPY,  ZGELQF, ZGEQP3, ZGEQRF, ZLACPY, ZLASCL,
  568:      $          ZLASET, ZLASSQ, ZLASWP, ZUNGQR, ZUNMLQ,
  569:      $          ZUNMQR, ZPOCON, DSCAL, ZDSCAL, ZSWAP,  ZTRSM,  XERBLA
  570: *
  571:       EXTERNAL  ZGESVJ
  572: *     ..
  573: *
  574: *     Test the input arguments
  575: *
  576: 
  577:       LSVEC  = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' )
  578:       JRACC  = LSAME( JOBV, 'J' )
  579:       RSVEC  = LSAME( JOBV, 'V' ) .OR. JRACC
  580:       ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' )
  581:       L2RANK = LSAME( JOBA, 'R' )
  582:       L2ABER = LSAME( JOBA, 'A' )
  583:       ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' )
  584:       L2TRAN = LSAME( JOBT, 'T' )
  585:       L2KILL = LSAME( JOBR, 'R' )
  586:       DEFR   = LSAME( JOBR, 'N' )
  587:       L2PERT = LSAME( JOBP, 'P' )
  588: *
  589:       IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR.
  590:      $     ERREST .OR. LSAME( JOBA, 'C' ) )) THEN
  591:          INFO = - 1
  592:       ELSE IF ( .NOT.( LSVEC  .OR. LSAME( JOBU, 'N' ) .OR.
  593:      $                             LSAME( JOBU, 'W' )) ) THEN
  594:          INFO = - 2
  595:       ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR.
  596:      $   LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN
  597:          INFO = - 3
  598:       ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) )    THEN
  599:          INFO = - 4
  600:       ELSE IF ( .NOT. ( L2TRAN .OR. LSAME( JOBT, 'N' ) ) ) THEN
  601:          INFO = - 5
  602:       ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN
  603:          INFO = - 6
  604:       ELSE IF ( M .LT. 0 ) THEN
  605:          INFO = - 7
  606:       ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN
  607:          INFO = - 8
  608:       ELSE IF ( LDA .LT. M ) THEN
  609:          INFO = - 10
  610:       ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN
  611:          INFO = - 13
  612:       ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN
  613:          INFO = - 15
  614:       ELSE IF ( (.NOT.(LSVEC .OR. RSVEC .OR. ERREST).AND.
  615:      $                           (LWORK .LT. 2*N+1)) .OR.
  616:      $ (.NOT.(LSVEC .OR. RSVEC) .AND. ERREST .AND.
  617:      $                         (LWORK .LT. N*N+3*N)) .OR.
  618:      $ (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. 3*N))
  619:      $ .OR.
  620:      $ (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. 3*N))
  621:      $ .OR.
  622:      $ (LSVEC .AND. RSVEC .AND. (.NOT.JRACC) .AND. 
  623:      $                          (LWORK.LT.5*N+2*N*N))
  624:      $ .OR. (LSVEC .AND. RSVEC .AND. JRACC .AND.
  625:      $                          LWORK.LT.4*N+N*N))
  626:      $   THEN
  627:          INFO = - 17
  628:       ELSE IF ( LRWORK.LT. MAX0(N+2*M,7)) THEN
  629:          INFO = -19 
  630:       ELSE
  631: *        #:)
  632:          INFO = 0
  633:       END IF
  634: *
  635:       IF ( INFO .NE. 0 ) THEN
  636: *       #:(
  637:          CALL XERBLA( 'ZGEJSV', - INFO )
  638:          RETURN
  639:       END IF
  640: *
  641: *     Quick return for void matrix (Y3K safe)
  642: * #:)
  643:       IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) RETURN
  644: *
  645: *     Determine whether the matrix U should be M x N or M x M
  646: *
  647:       IF ( LSVEC ) THEN
  648:          N1 = N
  649:          IF ( LSAME( JOBU, 'F' ) ) N1 = M
  650:       END IF
  651: *
  652: *     Set numerical parameters
  653: *
  654: *!    NOTE: Make sure DLAMCH() does not fail on the target architecture.
  655: *
  656:       EPSLN = DLAMCH('Epsilon')
  657:       SFMIN = DLAMCH('SafeMinimum')
  658:       SMALL = SFMIN / EPSLN
  659:       BIG   = DLAMCH('O')
  660: *     BIG   = ONE / SFMIN
  661: *
  662: *     Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
  663: *
  664: *(!)  If necessary, scale SVA() to protect the largest norm from
  665: *     overflow. It is possible that this scaling pushes the smallest
  666: *     column norm left from the underflow threshold (extreme case).
  667: *
  668:       SCALEM  = ONE / DSQRT(DFLOAT(M)*DFLOAT(N))
  669:       NOSCAL  = .TRUE.
  670:       GOSCAL  = .TRUE.
  671:       DO 1874 p = 1, N
  672:          AAPP = ZERO
  673:          AAQQ = ONE
  674:          CALL ZLASSQ( M, A(1,p), 1, AAPP, AAQQ )
  675:          IF ( AAPP .GT. BIG ) THEN
  676:             INFO = - 9
  677:             CALL XERBLA( 'ZGEJSV', -INFO )
  678:             RETURN
  679:          END IF
  680:          AAQQ = DSQRT(AAQQ)
  681:          IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL  ) THEN
  682:             SVA(p)  = AAPP * AAQQ
  683:          ELSE
  684:             NOSCAL  = .FALSE.
  685:             SVA(p)  = AAPP * ( AAQQ * SCALEM )
  686:             IF ( GOSCAL ) THEN
  687:                GOSCAL = .FALSE.
  688:                CALL DSCAL( p-1, SCALEM, SVA, 1 )
  689:             END IF
  690:          END IF
  691:  1874 CONTINUE
  692: *
  693:       IF ( NOSCAL ) SCALEM = ONE
  694: *
  695:       AAPP = ZERO
  696:       AAQQ = BIG
  697:       DO 4781 p = 1, N
  698:          AAPP = DMAX1( AAPP, SVA(p) )
  699:          IF ( SVA(p) .NE. ZERO ) AAQQ = DMIN1( AAQQ, SVA(p) )
  700:  4781 CONTINUE
  701: *
  702: *     Quick return for zero M x N matrix
  703: * #:)
  704:       IF ( AAPP .EQ. ZERO ) THEN
  705:          IF ( LSVEC ) CALL ZLASET( 'G', M, N1, CZERO, CONE, U, LDU )
  706:          IF ( RSVEC ) CALL ZLASET( 'G', N, N,  CZERO, CONE, V, LDV )
  707:          RWORK(1) = ONE
  708:          RWORK(2) = ONE
  709:          IF ( ERREST ) RWORK(3) = ONE
  710:          IF ( LSVEC .AND. RSVEC ) THEN
  711:             RWORK(4) = ONE
  712:             RWORK(5) = ONE
  713:          END IF
  714:          IF ( L2TRAN ) THEN
  715:             RWORK(6) = ZERO
  716:             RWORK(7) = ZERO
  717:          END IF
  718:          IWORK(1) = 0
  719:          IWORK(2) = 0
  720:          IWORK(3) = 0
  721:          RETURN
  722:       END IF
  723: *
  724: *     Issue warning if denormalized column norms detected. Override the
  725: *     high relative accuracy request. Issue licence to kill columns
  726: *     (set them to zero) whose norm is less than sigma_max / BIG (roughly).
  727: * #:(
  728:       WARNING = 0
  729:       IF ( AAQQ .LE. SFMIN ) THEN
  730:          L2RANK = .TRUE.
  731:          L2KILL = .TRUE.
  732:          WARNING = 1
  733:       END IF
  734: *
  735: *     Quick return for one-column matrix
  736: * #:)
  737:       IF ( N .EQ. 1 ) THEN
  738: *
  739:          IF ( LSVEC ) THEN
  740:             CALL ZLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR )
  741:             CALL ZLACPY( 'A', M, 1, A, LDA, U, LDU )
  742: *           computing all M left singular vectors of the M x 1 matrix
  743:             IF ( N1 .NE. N  ) THEN
  744:               CALL ZGEQRF( M, N, U,LDU, CWORK, CWORK(N+1),LWORK-N,IERR )
  745:               CALL ZUNGQR( M,N1,1, U,LDU,CWORK,CWORK(N+1),LWORK-N,IERR )
  746:               CALL ZCOPY( M, A(1,1), 1, U(1,1), 1 )
  747:             END IF
  748:          END IF
  749:          IF ( RSVEC ) THEN
  750:              V(1,1) = CONE
  751:          END IF
  752:          IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN
  753:             SVA(1)  = SVA(1) / SCALEM
  754:             SCALEM  = ONE
  755:          END IF
  756:          RWORK(1) = ONE / SCALEM
  757:          RWORK(2) = ONE
  758:          IF ( SVA(1) .NE. ZERO ) THEN
  759:             IWORK(1) = 1
  760:             IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN
  761:                IWORK(2) = 1
  762:             ELSE
  763:                IWORK(2) = 0
  764:             END IF
  765:          ELSE
  766:             IWORK(1) = 0
  767:             IWORK(2) = 0
  768:          END IF
  769:          IWORK(3) = 0 
  770:          IF ( ERREST ) RWORK(3) = ONE
  771:          IF ( LSVEC .AND. RSVEC ) THEN
  772:             RWORK(4) = ONE
  773:             RWORK(5) = ONE
  774:          END IF
  775:          IF ( L2TRAN ) THEN
  776:             RWORK(6) = ZERO
  777:             RWORK(7) = ZERO
  778:          END IF
  779:          RETURN
  780: *
  781:       END IF
  782: *
  783:       TRANSP = .FALSE.
  784:       L2TRAN = L2TRAN .AND. ( M .EQ. N )
  785: *
  786:       AATMAX = -ONE
  787:       AATMIN =  BIG
  788:       IF ( ROWPIV .OR. L2TRAN ) THEN
  789: *
  790: *     Compute the row norms, needed to determine row pivoting sequence
  791: *     (in the case of heavily row weighted A, row pivoting is strongly
  792: *     advised) and to collect information needed to compare the
  793: *     structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.).
  794: *
  795:          IF ( L2TRAN ) THEN
  796:             DO 1950 p = 1, M
  797:                XSC   = ZERO
  798:                TEMP1 = ONE
  799:                CALL ZLASSQ( N, A(p,1), LDA, XSC, TEMP1 )
  800: *              ZLASSQ gets both the ell_2 and the ell_infinity norm
  801: *              in one pass through the vector
  802:                RWORK(M+N+p)  = XSC * SCALEM
  803:                RWORK(N+p)    = XSC * (SCALEM*DSQRT(TEMP1))
  804:                AATMAX = DMAX1( AATMAX, RWORK(N+p) )
  805:                IF (RWORK(N+p) .NE. ZERO) 
  806:      $            AATMIN = DMIN1(AATMIN,RWORK(N+p))
  807:  1950       CONTINUE
  808:          ELSE
  809:             DO 1904 p = 1, M
  810:                RWORK(M+N+p) = SCALEM*ABS( A(p,IDAMAX(N,A(p,1),LDA)) )
  811:                AATMAX = DMAX1( AATMAX, RWORK(M+N+p) )
  812:                AATMIN = DMIN1( AATMIN, RWORK(M+N+p) )
  813:  1904       CONTINUE
  814:          END IF
  815: *
  816:       END IF
  817: *
  818: *     For square matrix A try to determine whether A^*  would be  better
  819: *     input for the preconditioned Jacobi SVD, with faster convergence.
  820: *     The decision is based on an O(N) function of the vector of column
  821: *     and row norms of A, based on the Shannon entropy. This should give
  822: *     the right choice in most cases when the difference actually matters.
  823: *     It may fail and pick the slower converging side.
  824: *
  825:       ENTRA  = ZERO
  826:       ENTRAT = ZERO
  827:       IF ( L2TRAN ) THEN
  828: *
  829:          XSC   = ZERO
  830:          TEMP1 = ONE
  831:          CALL ZLASSQ( N, SVA, 1, XSC, TEMP1 )
  832:          TEMP1 = ONE / TEMP1
  833: *
  834:          ENTRA = ZERO
  835:          DO 1113 p = 1, N
  836:             BIG1  = ( ( SVA(p) / XSC )**2 ) * TEMP1
  837:             IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * DLOG(BIG1)
  838:  1113    CONTINUE
  839:          ENTRA = - ENTRA / DLOG(DFLOAT(N))
  840: *
  841: *        Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
  842: *        It is derived from the diagonal of  A^* * A.  Do the same with the
  843: *        diagonal of A * A^*, compute the entropy of the corresponding
  844: *        probability distribution. Note that A * A^* and A^* * A have the
  845: *        same trace.
  846: *
  847:          ENTRAT = ZERO
  848:          DO 1114 p = N+1, N+M
  849:             BIG1 = ( ( RWORK(p) / XSC )**2 ) * TEMP1
  850:             IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * DLOG(BIG1)
  851:  1114    CONTINUE
  852:          ENTRAT = - ENTRAT / DLOG(DFLOAT(M))
  853: *
  854: *        Analyze the entropies and decide A or A^*. Smaller entropy
  855: *        usually means better input for the algorithm.
  856: *
  857:          TRANSP = ( ENTRAT .LT. ENTRA )
  858:          TRANSP = .TRUE.
  859: *
  860: *        If A^* is better than A, take the adjoint of A.
  861: *
  862:          IF ( TRANSP ) THEN
  863: *           In an optimal implementation, this trivial transpose
  864: *           should be replaced with faster transpose.
  865:             DO 1115 p = 1, N - 1
  866:                A(p,p) = DCONJG(A(p,p)) 
  867:                DO 1116 q = p + 1, N
  868:                    CTEMP = DCONJG(A(q,p))
  869:                   A(q,p) = DCONJG(A(p,q))
  870:                   A(p,q) = CTEMP
  871:  1116          CONTINUE
  872:  1115       CONTINUE
  873:             A(N,N) = DCONJG(A(N,N))
  874:             DO 1117 p = 1, N
  875:                RWORK(M+N+p) = SVA(p)
  876:                SVA(p)      = RWORK(N+p)
  877: *              previously computed row 2-norms are now column 2-norms 
  878: *              of the transposed matrix               
  879:  1117       CONTINUE
  880:             TEMP1  = AAPP
  881:             AAPP   = AATMAX
  882:             AATMAX = TEMP1
  883:             TEMP1  = AAQQ
  884:             AAQQ   = AATMIN
  885:             AATMIN = TEMP1
  886:             KILL   = LSVEC
  887:             LSVEC  = RSVEC
  888:             RSVEC  = KILL
  889:             IF ( LSVEC ) N1 = N 
  890: *
  891:             ROWPIV = .TRUE.
  892:          END IF
  893: *
  894:       END IF
  895: *     END IF L2TRAN
  896: *
  897: *     Scale the matrix so that its maximal singular value remains less
  898: *     than SQRT(BIG) -- the matrix is scaled so that its maximal column
  899: *     has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
  900: *     SQRT(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and
  901: *     BLAS routines that, in some implementations, are not capable of
  902: *     working in the full interval [SFMIN,BIG] and that they may provoke
  903: *     overflows in the intermediate results. If the singular values spread
  904: *     from SFMIN to BIG, then ZGESVJ will compute them. So, in that case,
  905: *     one should use ZGESVJ instead of ZGEJSV.
  906: *
  907:       BIG1   = DSQRT( BIG )
  908:       TEMP1  = DSQRT( BIG / DFLOAT(N) )
  909: *
  910:       CALL ZLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR )
  911:       IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN
  912:           AAQQ = ( AAQQ / AAPP ) * TEMP1
  913:       ELSE
  914:           AAQQ = ( AAQQ * TEMP1 ) / AAPP
  915:       END IF
  916:       TEMP1 = TEMP1 * SCALEM
  917:       CALL ZLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR )
  918: *
  919: *     To undo scaling at the end of this procedure, multiply the
  920: *     computed singular values with USCAL2 / USCAL1.
  921: *
  922:       USCAL1 = TEMP1
  923:       USCAL2 = AAPP
  924: *
  925:       IF ( L2KILL ) THEN
  926: *        L2KILL enforces computation of nonzero singular values in
  927: *        the restricted range of condition number of the initial A,
  928: *        sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
  929:          XSC = DSQRT( SFMIN )
  930:       ELSE
  931:          XSC = SMALL
  932: *
  933: *        Now, if the condition number of A is too big,
  934: *        sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
  935: *        as a precaution measure, the full SVD is computed using ZGESVJ
  936: *        with accumulated Jacobi rotations. This provides numerically
  937: *        more robust computation, at the cost of slightly increased run
  938: *        time. Depending on the concrete implementation of BLAS and LAPACK
  939: *        (i.e. how they behave in presence of extreme ill-conditioning) the
  940: *        implementor may decide to remove this switch.
  941:          IF ( ( AAQQ.LT.DSQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN
  942:             JRACC = .TRUE.
  943:          END IF
  944: *
  945:       END IF
  946:       IF ( AAQQ .LT. XSC ) THEN
  947:          DO 700 p = 1, N
  948:             IF ( SVA(p) .LT. XSC ) THEN
  949:                CALL ZLASET( 'A', M, 1, CZERO, CZERO, A(1,p), LDA )
  950:                SVA(p) = ZERO
  951:             END IF
  952:  700     CONTINUE
  953:       END IF
  954: *
  955: *     Preconditioning using QR factorization with pivoting
  956: *
  957:       IF ( ROWPIV ) THEN
  958: *        Optional row permutation (Bjoerck row pivoting):
  959: *        A result by Cox and Higham shows that the Bjoerck's
  960: *        row pivoting combined with standard column pivoting
  961: *        has similar effect as Powell-Reid complete pivoting.
  962: *        The ell-infinity norms of A are made nonincreasing.
  963:          DO 1952 p = 1, M - 1
  964:             q = IDAMAX( M-p+1, RWORK(M+N+p), 1 ) + p - 1
  965:             IWORK(2*N+p) = q
  966:             IF ( p .NE. q ) THEN
  967:                TEMP1        = RWORK(M+N+p)
  968:                RWORK(M+N+p) = RWORK(M+N+q)
  969:                RWORK(M+N+q) = TEMP1
  970:             END IF
  971:  1952    CONTINUE
  972:          CALL ZLASWP( N, A, LDA, 1, M-1, IWORK(2*N+1), 1 )
  973:       END IF
  974: 
  975: *
  976: *     End of the preparation phase (scaling, optional sorting and
  977: *     transposing, optional flushing of small columns).
  978: *
  979: *     Preconditioning
  980: *
  981: *     If the full SVD is needed, the right singular vectors are computed
  982: *     from a matrix equation, and for that we need theoretical analysis
  983: *     of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF.
  984: *     In all other cases the first RR QRF can be chosen by other criteria
  985: *     (eg speed by replacing global with restricted window pivoting, such
  986: *     as in xGEQPX from TOMS # 782). Good results will be obtained using
  987: *     xGEQPX with properly (!) chosen numerical parameters.
  988: *     Any improvement of ZGEQP3 improves overal performance of ZGEJSV.
  989: *
  990: *     A * P1 = Q1 * [ R1^* 0]^*:
  991:       DO 1963 p = 1, N
  992: *        .. all columns are free columns
  993:          IWORK(p) = 0
  994:  1963 CONTINUE
  995:       CALL ZGEQP3( M, N, A, LDA, IWORK, CWORK, CWORK(N+1), LWORK-N, 
  996:      $             RWORK, IERR )
  997: *
  998: *     The upper triangular matrix R1 from the first QRF is inspected for
  999: *     rank deficiency and possibilities for deflation, or possible
 1000: *     ill-conditioning. Depending on the user specified flag L2RANK,
 1001: *     the procedure explores possibilities to reduce the numerical
 1002: *     rank by inspecting the computed upper triangular factor. If
 1003: *     L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of
 1004: *     A + dA, where ||dA|| <= f(M,N)*EPSLN.
 1005: *
 1006:       NR = 1
 1007:       IF ( L2ABER ) THEN
 1008: *        Standard absolute error bound suffices. All sigma_i with
 1009: *        sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
 1010: *        agressive enforcement of lower numerical rank by introducing a
 1011: *        backward error of the order of N*EPSLN*||A||.
 1012:          TEMP1 = DSQRT(DFLOAT(N))*EPSLN
 1013:          DO 3001 p = 2, N
 1014:             IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN
 1015:                NR = NR + 1
 1016:             ELSE
 1017:                GO TO 3002
 1018:             END IF
 1019:  3001    CONTINUE
 1020:  3002    CONTINUE
 1021:       ELSE IF ( L2RANK ) THEN
 1022: *        .. similarly as above, only slightly more gentle (less agressive).
 1023: *        Sudden drop on the diagonal of R1 is used as the criterion for
 1024: *        close-to-rank-defficient.
 1025:          TEMP1 = DSQRT(SFMIN)
 1026:          DO 3401 p = 2, N
 1027:             IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR.
 1028:      $           ( ABS(A(p,p)) .LT. SMALL ) .OR.
 1029:      $           ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402
 1030:             NR = NR + 1
 1031:  3401    CONTINUE
 1032:  3402    CONTINUE
 1033: *
 1034:       ELSE
 1035: *        The goal is high relative accuracy. However, if the matrix
 1036: *        has high scaled condition number the relative accuracy is in
 1037: *        general not feasible. Later on, a condition number estimator
 1038: *        will be deployed to estimate the scaled condition number.
 1039: *        Here we just remove the underflowed part of the triangular
 1040: *        factor. This prevents the situation in which the code is
 1041: *        working hard to get the accuracy not warranted by the data.
 1042:          TEMP1  = DSQRT(SFMIN)
 1043:          DO 3301 p = 2, N
 1044:             IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR.
 1045:      $           ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302
 1046:             NR = NR + 1
 1047:  3301    CONTINUE
 1048:  3302    CONTINUE
 1049: *
 1050:       END IF
 1051: *
 1052:       ALMORT = .FALSE.
 1053:       IF ( NR .EQ. N ) THEN
 1054:          MAXPRJ = ONE
 1055:          DO 3051 p = 2, N
 1056:             TEMP1  = ABS(A(p,p)) / SVA(IWORK(p))
 1057:             MAXPRJ = DMIN1( MAXPRJ, TEMP1 )
 1058:  3051    CONTINUE
 1059:          IF ( MAXPRJ**2 .GE. ONE - DFLOAT(N)*EPSLN ) ALMORT = .TRUE.
 1060:       END IF
 1061: *
 1062: *
 1063:       SCONDA = - ONE
 1064:       CONDR1 = - ONE
 1065:       CONDR2 = - ONE
 1066: *
 1067:       IF ( ERREST ) THEN
 1068:          IF ( N .EQ. NR ) THEN
 1069:             IF ( RSVEC ) THEN
 1070: *              .. V is available as workspace
 1071:                CALL ZLACPY( 'U', N, N, A, LDA, V, LDV )
 1072:                DO 3053 p = 1, N
 1073:                   TEMP1 = SVA(IWORK(p))
 1074:                   CALL ZDSCAL( p, ONE/TEMP1, V(1,p), 1 )
 1075:  3053          CONTINUE
 1076:                CALL ZPOCON( 'U', N, V, LDV, ONE, TEMP1,
 1077:      $              CWORK(N+1), RWORK, IERR )
 1078: *          
 1079:             ELSE IF ( LSVEC ) THEN
 1080: *              .. U is available as workspace
 1081:                CALL ZLACPY( 'U', N, N, A, LDA, U, LDU )
 1082:                DO 3054 p = 1, N
 1083:                   TEMP1 = SVA(IWORK(p))
 1084:                   CALL ZDSCAL( p, ONE/TEMP1, U(1,p), 1 )
 1085:  3054          CONTINUE
 1086:                CALL ZPOCON( 'U', N, U, LDU, ONE, TEMP1,
 1087:      $              CWORK(N+1), RWORK, IERR )
 1088:             ELSE
 1089:                CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N )
 1090:                DO 3052 p = 1, N
 1091:                   TEMP1 = SVA(IWORK(p))
 1092:                   CALL ZDSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 )
 1093:  3052          CONTINUE
 1094: *           .. the columns of R are scaled to have unit Euclidean lengths.
 1095:                CALL ZPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1,
 1096:      $              CWORK(N+N*N+1), RWORK, IERR )
 1097: *              
 1098:             END IF
 1099:             SCONDA = ONE / DSQRT(TEMP1)
 1100: *           SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
 1101: *           N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
 1102:          ELSE
 1103:             SCONDA = - ONE
 1104:          END IF
 1105:       END IF
 1106: *
 1107:       L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. DSQRT(BIG1) )
 1108: *     If there is no violent scaling, artificial perturbation is not needed.
 1109: *
 1110: *     Phase 3:
 1111: *
 1112:       IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN
 1113: *
 1114: *         Singular Values only
 1115: *
 1116: *         .. transpose A(1:NR,1:N)
 1117:          DO 1946 p = 1, MIN0( N-1, NR )
 1118:             CALL ZCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 )
 1119:             CALL ZLACGV( N-p+1, A(p,p), 1 )
 1120:  1946    CONTINUE
 1121:          IF ( NR .EQ. N ) A(N,N) = DCONJG(A(N,N))        
 1122: *
 1123: *        The following two DO-loops introduce small relative perturbation
 1124: *        into the strict upper triangle of the lower triangular matrix.
 1125: *        Small entries below the main diagonal are also changed.
 1126: *        This modification is useful if the computing environment does not
 1127: *        provide/allow FLUSH TO ZERO underflow, for it prevents many
 1128: *        annoying denormalized numbers in case of strongly scaled matrices.
 1129: *        The perturbation is structured so that it does not introduce any
 1130: *        new perturbation of the singular values, and it does not destroy
 1131: *        the job done by the preconditioner.
 1132: *        The licence for this perturbation is in the variable L2PERT, which
 1133: *        should be .FALSE. if FLUSH TO ZERO underflow is active.
 1134: *
 1135:          IF ( .NOT. ALMORT ) THEN
 1136: *
 1137:             IF ( L2PERT ) THEN
 1138: *              XSC = SQRT(SMALL)
 1139:                XSC = EPSLN / DFLOAT(N)
 1140:                DO 4947 q = 1, NR
 1141:                   CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO)
 1142:                   DO 4949 p = 1, N
 1143:                      IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) )
 1144:      $                    .OR. ( p .LT. q ) )
 1145: *     $                     A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
 1146:      $                     A(p,q) = CTEMP
 1147:  4949             CONTINUE
 1148:  4947          CONTINUE
 1149:             ELSE
 1150:                CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, A(1,2),LDA )
 1151:             END IF
 1152: *
 1153: *            .. second preconditioning using the QR factorization
 1154: *
 1155:             CALL ZGEQRF( N,NR, A,LDA, CWORK, CWORK(N+1),LWORK-N, IERR )
 1156: *
 1157: *           .. and transpose upper to lower triangular
 1158:             DO 1948 p = 1, NR - 1
 1159:                CALL ZCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 )
 1160:                CALL ZLACGV( NR-p+1, A(p,p), 1 )
 1161:  1948       CONTINUE
 1162: *
 1163:       END IF
 1164: *
 1165: *           Row-cyclic Jacobi SVD algorithm with column pivoting
 1166: *
 1167: *           .. again some perturbation (a "background noise") is added
 1168: *           to drown denormals
 1169:             IF ( L2PERT ) THEN
 1170: *              XSC = SQRT(SMALL)
 1171:                XSC = EPSLN / DFLOAT(N)
 1172:                DO 1947 q = 1, NR
 1173:                   CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO)
 1174:                   DO 1949 p = 1, NR
 1175:                      IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) )
 1176:      $                       .OR. ( p .LT. q ) )
 1177: *     $                   A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
 1178:      $                   A(p,q) = CTEMP 
 1179:  1949             CONTINUE
 1180:  1947          CONTINUE
 1181:             ELSE
 1182:                CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, A(1,2), LDA )
 1183:             END IF
 1184: *
 1185: *           .. and one-sided Jacobi rotations are started on a lower
 1186: *           triangular matrix (plus perturbation which is ignored in
 1187: *           the part which destroys triangular form (confusing?!))
 1188: *
 1189:             CALL ZGESVJ( 'L', 'NoU', 'NoV', NR, NR, A, LDA, SVA,
 1190:      $                N, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO )
 1191: *
 1192:             SCALEM  = RWORK(1)
 1193:             NUMRANK = NINT(RWORK(2))
 1194: *
 1195: *
 1196:       ELSE IF ( RSVEC .AND. ( .NOT. LSVEC ) ) THEN
 1197: *
 1198: *        -> Singular Values and Right Singular Vectors <-
 1199: *
 1200:          IF ( ALMORT ) THEN
 1201: *
 1202: *           .. in this case NR equals N
 1203:             DO 1998 p = 1, NR
 1204:                CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
 1205:                CALL ZLACGV( N-p+1, V(p,p), 1 )
 1206:  1998       CONTINUE
 1207:             CALL ZLASET( 'Upper', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV )
 1208: *
 1209:             CALL ZGESVJ( 'L','U','N', N, NR, V,LDV, SVA, NR, A,LDA,
 1210:      $                  CWORK, LWORK, RWORK, LRWORK, INFO )
 1211:             SCALEM  = RWORK(1)
 1212:             NUMRANK = NINT(RWORK(2))
 1213: 
 1214:          ELSE
 1215: *
 1216: *        .. two more QR factorizations ( one QRF is not enough, two require
 1217: *        accumulated product of Jacobi rotations, three are perfect )
 1218: *
 1219:             CALL ZLASET( 'Lower', NR-1,NR-1, CZERO, CZERO, A(2,1), LDA )
 1220:             CALL ZGELQF( NR,N, A, LDA, CWORK, CWORK(N+1), LWORK-N, IERR)
 1221:             CALL ZLACPY( 'Lower', NR, NR, A, LDA, V, LDV )
 1222:             CALL ZLASET( 'Upper', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV )
 1223:             CALL ZGEQRF( NR, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
 1224:      $                   LWORK-2*N, IERR )
 1225:             DO 8998 p = 1, NR
 1226:                CALL ZCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 )
 1227:                CALL ZLACGV( NR-p+1, V(p,p), 1 ) 
 1228:  8998       CONTINUE
 1229:             CALL ZLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
 1230: *
 1231:             CALL ZGESVJ( 'Lower', 'U','N', NR, NR, V,LDV, SVA, NR, U,
 1232:      $                  LDU, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO )
 1233:             SCALEM  = RWORK(1)
 1234:             NUMRANK = NINT(RWORK(2))
 1235:             IF ( NR .LT. N ) THEN
 1236:                CALL ZLASET( 'A',N-NR, NR, CZERO,CZERO, V(NR+1,1),  LDV )
 1237:                CALL ZLASET( 'A',NR, N-NR, CZERO,CZERO, V(1,NR+1),  LDV )
 1238:                CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE, V(NR+1,NR+1),LDV )
 1239:             END IF
 1240: *
 1241:          CALL ZUNMLQ( 'Left', 'C', N, N, NR, A, LDA, CWORK,
 1242:      $               V, LDV, CWORK(N+1), LWORK-N, IERR )
 1243: *
 1244:          END IF
 1245: *
 1246:          DO 8991 p = 1, N
 1247:             CALL ZCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA )
 1248:  8991    CONTINUE
 1249:          CALL ZLACPY( 'All', N, N, A, LDA, V, LDV )
 1250: *
 1251:          IF ( TRANSP ) THEN
 1252:             CALL ZLACPY( 'All', N, N, V, LDV, U, LDU )
 1253:          END IF
 1254: *
 1255:       ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN
 1256: *
 1257: *        .. Singular Values and Left Singular Vectors                 ..
 1258: *
 1259: *        .. second preconditioning step to avoid need to accumulate
 1260: *        Jacobi rotations in the Jacobi iterations.
 1261:          DO 1965 p = 1, NR
 1262:             CALL ZCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 )
 1263:             CALL ZLACGV( N-p+1, U(p,p), 1 )
 1264:  1965    CONTINUE
 1265:          CALL ZLASET( 'Upper', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU )
 1266: *
 1267:          CALL ZGEQRF( N, NR, U, LDU, CWORK(N+1), CWORK(2*N+1),
 1268:      $              LWORK-2*N, IERR )
 1269: *
 1270:          DO 1967 p = 1, NR - 1
 1271:             CALL ZCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 )
 1272:             CALL ZLACGV( N-p+1, U(p,p), 1 )            
 1273:  1967    CONTINUE
 1274:          CALL ZLASET( 'Upper', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU )
 1275: *
 1276:          CALL ZGESVJ( 'Lower', 'U', 'N', NR,NR, U, LDU, SVA, NR, A,
 1277:      $        LDA, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO )
 1278:          SCALEM  = RWORK(1)
 1279:          NUMRANK = NINT(RWORK(2))
 1280: *
 1281:          IF ( NR .LT. M ) THEN
 1282:             CALL ZLASET( 'A',  M-NR, NR,CZERO, CZERO, U(NR+1,1), LDU )
 1283:             IF ( NR .LT. N1 ) THEN
 1284:                CALL ZLASET( 'A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU )
 1285:                CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE,U(NR+1,NR+1),LDU )
 1286:             END IF
 1287:          END IF
 1288: *
 1289:          CALL ZUNMQR( 'Left', 'No Tr', M, N1, N, A, LDA, CWORK, U,
 1290:      $               LDU, CWORK(N+1), LWORK-N, IERR )
 1291: *
 1292:          IF ( ROWPIV )
 1293:      $       CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 1294: *
 1295:          DO 1974 p = 1, N1
 1296:             XSC = ONE / DZNRM2( M, U(1,p), 1 )
 1297:             CALL ZDSCAL( M, XSC, U(1,p), 1 )
 1298:  1974    CONTINUE
 1299: *
 1300:          IF ( TRANSP ) THEN
 1301:             CALL ZLACPY( 'All', N, N, U, LDU, V, LDV )
 1302:          END IF
 1303: *
 1304:       ELSE
 1305: *
 1306: *        .. Full SVD ..
 1307: *
 1308:          IF ( .NOT. JRACC ) THEN
 1309: *
 1310:          IF ( .NOT. ALMORT ) THEN
 1311: *
 1312: *           Second Preconditioning Step (QRF [with pivoting])
 1313: *           Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
 1314: *           equivalent to an LQF CALL. Since in many libraries the QRF
 1315: *           seems to be better optimized than the LQF, we do explicit
 1316: *           transpose and use the QRF. This is subject to changes in an
 1317: *           optimized implementation of ZGEJSV.
 1318: *
 1319:             DO 1968 p = 1, NR
 1320:                CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
 1321:                CALL ZLACGV( N-p+1, V(p,p), 1 )
 1322:  1968       CONTINUE
 1323: *
 1324: *           .. the following two loops perturb small entries to avoid
 1325: *           denormals in the second QR factorization, where they are
 1326: *           as good as zeros. This is done to avoid painfully slow
 1327: *           computation with denormals. The relative size of the perturbation
 1328: *           is a parameter that can be changed by the implementer.
 1329: *           This perturbation device will be obsolete on machines with
 1330: *           properly implemented arithmetic.
 1331: *           To switch it off, set L2PERT=.FALSE. To remove it from  the
 1332: *           code, remove the action under L2PERT=.TRUE., leave the ELSE part.
 1333: *           The following two loops should be blocked and fused with the
 1334: *           transposed copy above.
 1335: *
 1336:             IF ( L2PERT ) THEN
 1337:                XSC = DSQRT(SMALL)
 1338:                DO 2969 q = 1, NR
 1339:                   CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO)
 1340:                   DO 2968 p = 1, N
 1341:                      IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 )
 1342:      $                   .OR. ( p .LT. q ) )
 1343: *     $                   V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
 1344:      $                   V(p,q) = CTEMP      
 1345:                      IF ( p .LT. q ) V(p,q) = - V(p,q)
 1346:  2968             CONTINUE
 1347:  2969          CONTINUE
 1348:             ELSE
 1349:                CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV )
 1350:             END IF
 1351: *
 1352: *           Estimate the row scaled condition number of R1
 1353: *           (If R1 is rectangular, N > NR, then the condition number
 1354: *           of the leading NR x NR submatrix is estimated.)
 1355: *
 1356:             CALL ZLACPY( 'L', NR, NR, V, LDV, CWORK(2*N+1), NR )
 1357:             DO 3950 p = 1, NR
 1358:                TEMP1 = DZNRM2(NR-p+1,CWORK(2*N+(p-1)*NR+p),1)
 1359:                CALL ZDSCAL(NR-p+1,ONE/TEMP1,CWORK(2*N+(p-1)*NR+p),1)
 1360:  3950       CONTINUE
 1361:             CALL ZPOCON('Lower',NR,CWORK(2*N+1),NR,ONE,TEMP1,
 1362:      $                   CWORK(2*N+NR*NR+1),RWORK,IERR)
 1363:             CONDR1 = ONE / DSQRT(TEMP1)
 1364: *           .. here need a second oppinion on the condition number
 1365: *           .. then assume worst case scenario
 1366: *           R1 is OK for inverse <=> CONDR1 .LT. DFLOAT(N)
 1367: *           more conservative    <=> CONDR1 .LT. SQRT(DFLOAT(N))
 1368: *
 1369:             COND_OK = DSQRT(DSQRT(DFLOAT(NR)))
 1370: *[TP]       COND_OK is a tuning parameter.
 1371: *
 1372:             IF ( CONDR1 .LT. COND_OK ) THEN
 1373: *              .. the second QRF without pivoting. Note: in an optimized
 1374: *              implementation, this QRF should be implemented as the QRF
 1375: *              of a lower triangular matrix.
 1376: *              R1^* = Q2 * R2
 1377:                CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
 1378:      $              LWORK-2*N, IERR )
 1379: *
 1380:                IF ( L2PERT ) THEN
 1381:                   XSC = DSQRT(SMALL)/EPSLN
 1382:                   DO 3959 p = 2, NR
 1383:                      DO 3958 q = 1, p - 1
 1384:                         CTEMP=DCMPLX(XSC*DMIN1(ABS(V(p,p)),ABS(V(q,q))),
 1385:      $                              ZERO)
 1386:                         IF ( ABS(V(q,p)) .LE. TEMP1 )
 1387: *     $                     V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
 1388:      $                     V(q,p) = CTEMP    
 1389:  3958                CONTINUE
 1390:  3959             CONTINUE
 1391:                END IF
 1392: *
 1393:                IF ( NR .NE. N )
 1394:      $         CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N )
 1395: *              .. save ...
 1396: *
 1397: *           .. this transposed copy should be better than naive
 1398:                DO 1969 p = 1, NR - 1
 1399:                   CALL ZCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 )
 1400:                   CALL ZLACGV(NR-p+1, V(p,p), 1 )
 1401:  1969          CONTINUE
 1402:                V(NR,NR)=DCONJG(V(NR,NR))   
 1403: *
 1404:                CONDR2 = CONDR1
 1405: *
 1406:             ELSE
 1407: *
 1408: *              .. ill-conditioned case: second QRF with pivoting
 1409: *              Note that windowed pivoting would be equaly good
 1410: *              numerically, and more run-time efficient. So, in
 1411: *              an optimal implementation, the next call to ZGEQP3
 1412: *              should be replaced with eg. CALL ZGEQPX (ACM TOMS #782)
 1413: *              with properly (carefully) chosen parameters.
 1414: *
 1415: *              R1^* * P2 = Q2 * R2
 1416:                DO 3003 p = 1, NR
 1417:                   IWORK(N+p) = 0
 1418:  3003          CONTINUE
 1419:                CALL ZGEQP3( N, NR, V, LDV, IWORK(N+1), CWORK(N+1),
 1420:      $                  CWORK(2*N+1), LWORK-2*N, RWORK, IERR )
 1421: **               CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
 1422: **     $              LWORK-2*N, IERR )
 1423:                IF ( L2PERT ) THEN
 1424:                   XSC = DSQRT(SMALL)
 1425:                   DO 3969 p = 2, NR
 1426:                      DO 3968 q = 1, p - 1
 1427:                         CTEMP=DCMPLX(XSC*DMIN1(ABS(V(p,p)),ABS(V(q,q))),
 1428:      $                                ZERO)
 1429:                         IF ( ABS(V(q,p)) .LE. TEMP1 )
 1430: *     $                     V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
 1431:      $                     V(q,p) = CTEMP                     
 1432:  3968                CONTINUE
 1433:  3969             CONTINUE
 1434:                END IF
 1435: *
 1436:                CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N )
 1437: *
 1438:                IF ( L2PERT ) THEN
 1439:                   XSC = DSQRT(SMALL)
 1440:                   DO 8970 p = 2, NR
 1441:                      DO 8971 q = 1, p - 1
 1442:                         CTEMP=DCMPLX(XSC*DMIN1(ABS(V(p,p)),ABS(V(q,q))),
 1443:      $                               ZERO)
 1444: *                        V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) )
 1445:                         V(p,q) = - CTEMP      
 1446:  8971                CONTINUE
 1447:  8970             CONTINUE
 1448:                ELSE
 1449:                   CALL ZLASET( 'L',NR-1,NR-1,CZERO,CZERO,V(2,1),LDV )
 1450:                END IF
 1451: *              Now, compute R2 = L3 * Q3, the LQ factorization.
 1452:                CALL ZGELQF( NR, NR, V, LDV, CWORK(2*N+N*NR+1),
 1453:      $               CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR )
 1454: *              .. and estimate the condition number
 1455:                CALL ZLACPY( 'L',NR,NR,V,LDV,CWORK(2*N+N*NR+NR+1),NR )
 1456:                DO 4950 p = 1, NR
 1457:                   TEMP1 = DZNRM2( p, CWORK(2*N+N*NR+NR+p), NR )
 1458:                   CALL ZDSCAL( p, ONE/TEMP1, CWORK(2*N+N*NR+NR+p), NR )
 1459:  4950          CONTINUE
 1460:                CALL ZPOCON( 'L',NR,CWORK(2*N+N*NR+NR+1),NR,ONE,TEMP1,
 1461:      $              CWORK(2*N+N*NR+NR+NR*NR+1),RWORK,IERR ) 
 1462:                CONDR2 = ONE / DSQRT(TEMP1)
 1463: *
 1464: *
 1465:                IF ( CONDR2 .GE. COND_OK ) THEN
 1466: *                 .. save the Householder vectors used for Q3
 1467: *                 (this overwrittes the copy of R2, as it will not be
 1468: *                 needed in this branch, but it does not overwritte the
 1469: *                 Huseholder vectors of Q2.).
 1470:                   CALL ZLACPY( 'U', NR, NR, V, LDV, CWORK(2*N+1), N )
 1471: *                 .. and the rest of the information on Q3 is in
 1472: *                 WORK(2*N+N*NR+1:2*N+N*NR+N)
 1473:                END IF
 1474: *
 1475:             END IF
 1476: *
 1477:             IF ( L2PERT ) THEN
 1478:                XSC = DSQRT(SMALL)
 1479:                DO 4968 q = 2, NR
 1480:                   CTEMP = XSC * V(q,q)
 1481:                   DO 4969 p = 1, q - 1
 1482: *                     V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) )
 1483:                      V(p,q) = - CTEMP
 1484:  4969             CONTINUE
 1485:  4968          CONTINUE
 1486:             ELSE
 1487:                CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV )
 1488:             END IF
 1489: *
 1490: *        Second preconditioning finished; continue with Jacobi SVD
 1491: *        The input matrix is lower trinagular.
 1492: *
 1493: *        Recover the right singular vectors as solution of a well
 1494: *        conditioned triangular matrix equation.
 1495: *
 1496:             IF ( CONDR1 .LT. COND_OK ) THEN
 1497: *
 1498:                CALL ZGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, LDU,
 1499:      $              CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,RWORK,
 1500:      $              LRWORK, INFO )
 1501:                SCALEM  = RWORK(1)
 1502:                NUMRANK = NINT(RWORK(2))
 1503:                DO 3970 p = 1, NR
 1504:                   CALL ZCOPY(  NR, V(1,p), 1, U(1,p), 1 )
 1505:                   CALL ZDSCAL( NR, SVA(p),    V(1,p), 1 )
 1506:  3970          CONTINUE
 1507: 
 1508: *        .. pick the right matrix equation and solve it
 1509: *
 1510:                IF ( NR .EQ. N ) THEN
 1511: * :))             .. best case, R1 is inverted. The solution of this matrix
 1512: *                 equation is Q2*V2 = the product of the Jacobi rotations
 1513: *                 used in ZGESVJ, premultiplied with the orthogonal matrix
 1514: *                 from the second QR factorization.
 1515:                   CALL ZTRSM('L','U','N','N', NR,NR,CONE, A,LDA, V,LDV)
 1516:                ELSE
 1517: *                 .. R1 is well conditioned, but non-square. Adjoint of R2
 1518: *                 is inverted to get the product of the Jacobi rotations
 1519: *                 used in ZGESVJ. The Q-factor from the second QR
 1520: *                 factorization is then built in explicitly.
 1521:                   CALL ZTRSM('L','U','C','N',NR,NR,CONE,CWORK(2*N+1),
 1522:      $                 N,V,LDV)
 1523:                   IF ( NR .LT. N ) THEN
 1524:                    CALL ZLASET('A',N-NR,NR,ZERO,CZERO,V(NR+1,1),LDV)
 1525:                    CALL ZLASET('A',NR,N-NR,ZERO,CZERO,V(1,NR+1),LDV)
 1526:                    CALL ZLASET('A',N-NR,N-NR,ZERO,CONE,V(NR+1,NR+1),LDV)
 1527:                   END IF
 1528:                   CALL ZUNMQR('L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1),
 1529:      $                V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR)
 1530:                END IF
 1531: *
 1532:             ELSE IF ( CONDR2 .LT. COND_OK ) THEN
 1533: *
 1534: *              The matrix R2 is inverted. The solution of the matrix equation
 1535: *              is Q3^* * V3 = the product of the Jacobi rotations (appplied to
 1536: *              the lower triangular L3 from the LQ factorization of
 1537: *              R2=L3*Q3), pre-multiplied with the transposed Q3.
 1538:                CALL ZGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U,
 1539:      $              LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, 
 1540:      $          RWORK, LRWORK, INFO )
 1541:                SCALEM  = RWORK(1)
 1542:                NUMRANK = NINT(RWORK(2))
 1543:                DO 3870 p = 1, NR
 1544:                   CALL ZCOPY( NR, V(1,p), 1, U(1,p), 1 )
 1545:                   CALL ZDSCAL( NR, SVA(p),    U(1,p), 1 )
 1546:  3870          CONTINUE
 1547:                CALL ZTRSM('L','U','N','N',NR,NR,CONE,CWORK(2*N+1),N,
 1548:      $                    U,LDU)
 1549: *              .. apply the permutation from the second QR factorization
 1550:                DO 873 q = 1, NR
 1551:                   DO 872 p = 1, NR
 1552:                      CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q)
 1553:  872              CONTINUE
 1554:                   DO 874 p = 1, NR
 1555:                      U(p,q) = CWORK(2*N+N*NR+NR+p)
 1556:  874              CONTINUE
 1557:  873           CONTINUE
 1558:                IF ( NR .LT. N ) THEN
 1559:                   CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV )
 1560:                   CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV )
 1561:                   CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV)
 1562:                END IF
 1563:                CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1),
 1564:      $              V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
 1565:             ELSE
 1566: *              Last line of defense.
 1567: * #:(          This is a rather pathological case: no scaled condition
 1568: *              improvement after two pivoted QR factorizations. Other
 1569: *              possibility is that the rank revealing QR factorization
 1570: *              or the condition estimator has failed, or the COND_OK
 1571: *              is set very close to ONE (which is unnecessary). Normally,
 1572: *              this branch should never be executed, but in rare cases of
 1573: *              failure of the RRQR or condition estimator, the last line of
 1574: *              defense ensures that ZGEJSV completes the task.
 1575: *              Compute the full SVD of L3 using ZGESVJ with explicit
 1576: *              accumulation of Jacobi rotations.
 1577:                CALL ZGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U,
 1578:      $              LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, 
 1579:      $                         RWORK, LRWORK, INFO )
 1580:                SCALEM  = RWORK(1)
 1581:                NUMRANK = NINT(RWORK(2))
 1582:                IF ( NR .LT. N ) THEN
 1583:                   CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV )
 1584:                   CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV )
 1585:                   CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV)
 1586:                END IF
 1587:                CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1),
 1588:      $              V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
 1589: *
 1590:                CALL ZUNMLQ( 'L', 'C', NR, NR, NR, CWORK(2*N+1), N,
 1591:      $              CWORK(2*N+N*NR+1), U, LDU, CWORK(2*N+N*NR+NR+1),
 1592:      $              LWORK-2*N-N*NR-NR, IERR )
 1593:                DO 773 q = 1, NR
 1594:                   DO 772 p = 1, NR
 1595:                      CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q)
 1596:  772              CONTINUE
 1597:                   DO 774 p = 1, NR
 1598:                      U(p,q) = CWORK(2*N+N*NR+NR+p)
 1599:  774              CONTINUE
 1600:  773           CONTINUE
 1601: *
 1602:             END IF
 1603: *
 1604: *           Permute the rows of V using the (column) permutation from the
 1605: *           first QRF. Also, scale the columns to make them unit in
 1606: *           Euclidean norm. This applies to all cases.
 1607: *
 1608:             TEMP1 = DSQRT(DFLOAT(N)) * EPSLN
 1609:             DO 1972 q = 1, N
 1610:                DO 972 p = 1, N
 1611:                   CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q)
 1612:   972          CONTINUE
 1613:                DO 973 p = 1, N
 1614:                   V(p,q) = CWORK(2*N+N*NR+NR+p)
 1615:   973          CONTINUE
 1616:                XSC = ONE / DZNRM2( N, V(1,q), 1 )
 1617:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1618:      $           CALL ZDSCAL( N, XSC, V(1,q), 1 )
 1619:  1972       CONTINUE
 1620: *           At this moment, V contains the right singular vectors of A.
 1621: *           Next, assemble the left singular vector matrix U (M x N).
 1622:             IF ( NR .LT. M ) THEN
 1623:                CALL ZLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU)
 1624:                IF ( NR .LT. N1 ) THEN
 1625:                   CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU)
 1626:                   CALL ZLASET('A',M-NR,N1-NR,CZERO,CONE,
 1627:      $                        U(NR+1,NR+1),LDU)
 1628:                END IF
 1629:             END IF
 1630: *
 1631: *           The Q matrix from the first QRF is built into the left singular
 1632: *           matrix U. This applies to all cases.
 1633: *
 1634:             CALL ZUNMQR( 'Left', 'No_Tr', M, N1, N, A, LDA, CWORK, U,
 1635:      $           LDU, CWORK(N+1), LWORK-N, IERR )
 1636: 
 1637: *           The columns of U are normalized. The cost is O(M*N) flops.
 1638:             TEMP1 = DSQRT(DFLOAT(M)) * EPSLN
 1639:             DO 1973 p = 1, NR
 1640:                XSC = ONE / DZNRM2( M, U(1,p), 1 )
 1641:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1642:      $          CALL ZDSCAL( M, XSC, U(1,p), 1 )
 1643:  1973       CONTINUE
 1644: *
 1645: *           If the initial QRF is computed with row pivoting, the left
 1646: *           singular vectors must be adjusted.
 1647: *
 1648:             IF ( ROWPIV )
 1649:      $          CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 1650: *
 1651:          ELSE
 1652: *
 1653: *        .. the initial matrix A has almost orthogonal columns and
 1654: *        the second QRF is not needed
 1655: *
 1656:             CALL ZLACPY( 'Upper', N, N, A, LDA, CWORK(N+1), N )
 1657:             IF ( L2PERT ) THEN
 1658:                XSC = DSQRT(SMALL)
 1659:                DO 5970 p = 2, N
 1660:                   CTEMP = XSC * CWORK( N + (p-1)*N + p )
 1661:                   DO 5971 q = 1, p - 1
 1662: *                     CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
 1663: *     $                                        ABS(CWORK(N+(p-1)*N+q)) )
 1664:                      CWORK(N+(q-1)*N+p)=-CTEMP           
 1665:  5971             CONTINUE
 1666:  5970          CONTINUE
 1667:             ELSE
 1668:                CALL ZLASET( 'Lower',N-1,N-1,CZERO,CZERO,CWORK(N+2),N )
 1669:             END IF
 1670: *
 1671:             CALL ZGESVJ( 'Upper', 'U', 'N', N, N, CWORK(N+1), N, SVA,
 1672:      $           N, U, LDU, CWORK(N+N*N+1), LWORK-N-N*N, RWORK, LRWORK, 
 1673:      $       INFO )
 1674: *
 1675:             SCALEM  = RWORK(1)
 1676:             NUMRANK = NINT(RWORK(2))
 1677:             DO 6970 p = 1, N
 1678:                CALL ZCOPY( N, CWORK(N+(p-1)*N+1), 1, U(1,p), 1 )
 1679:                CALL ZDSCAL( N, SVA(p), CWORK(N+(p-1)*N+1), 1 )
 1680:  6970       CONTINUE
 1681: *
 1682:             CALL ZTRSM( 'Left', 'Upper', 'NoTrans', 'No UD', N, N,
 1683:      $           CONE, A, LDA, CWORK(N+1), N )
 1684:             DO 6972 p = 1, N
 1685:                CALL ZCOPY( N, CWORK(N+p), N, V(IWORK(p),1), LDV )
 1686:  6972       CONTINUE
 1687:             TEMP1 = DSQRT(DFLOAT(N))*EPSLN
 1688:             DO 6971 p = 1, N
 1689:                XSC = ONE / DZNRM2( N, V(1,p), 1 )
 1690:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1691:      $            CALL ZDSCAL( N, XSC, V(1,p), 1 )
 1692:  6971       CONTINUE
 1693: *
 1694: *           Assemble the left singular vector matrix U (M x N).
 1695: *
 1696:             IF ( N .LT. M ) THEN
 1697:                CALL ZLASET( 'A',  M-N, N, CZERO, CZERO, U(N+1,1), LDU )
 1698:                IF ( N .LT. N1 ) THEN
 1699:                   CALL ZLASET('A',N,  N1-N, CZERO, CZERO,  U(1,N+1),LDU)
 1700:                   CALL ZLASET( 'A',M-N,N1-N, CZERO, CONE,U(N+1,N+1),LDU)
 1701:                END IF
 1702:             END IF
 1703:             CALL ZUNMQR( 'Left', 'No Tr', M, N1, N, A, LDA, CWORK, U,
 1704:      $           LDU, CWORK(N+1), LWORK-N, IERR )
 1705:             TEMP1 = DSQRT(DFLOAT(M))*EPSLN
 1706:             DO 6973 p = 1, N1
 1707:                XSC = ONE / DZNRM2( M, U(1,p), 1 )
 1708:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1709:      $            CALL ZDSCAL( M, XSC, U(1,p), 1 )
 1710:  6973       CONTINUE
 1711: *
 1712:             IF ( ROWPIV )
 1713:      $         CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 1714: *
 1715:          END IF
 1716: *
 1717: *        end of the  >> almost orthogonal case <<  in the full SVD
 1718: *
 1719:          ELSE
 1720: *
 1721: *        This branch deploys a preconditioned Jacobi SVD with explicitly
 1722: *        accumulated rotations. It is included as optional, mainly for
 1723: *        experimental purposes. It does perfom well, and can also be used.
 1724: *        In this implementation, this branch will be automatically activated
 1725: *        if the  condition number sigma_max(A) / sigma_min(A) is predicted
 1726: *        to be greater than the overflow threshold. This is because the
 1727: *        a posteriori computation of the singular vectors assumes robust
 1728: *        implementation of BLAS and some LAPACK procedures, capable of working
 1729: *        in presence of extreme values. Since that is not always the case, ...
 1730: *
 1731:          DO 7968 p = 1, NR
 1732:             CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
 1733:             CALL ZLACGV( N-p+1, V(p,p), 1 )
 1734:  7968    CONTINUE
 1735: *
 1736:          IF ( L2PERT ) THEN
 1737:             XSC = DSQRT(SMALL/EPSLN)
 1738:             DO 5969 q = 1, NR
 1739:                CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO)
 1740:                DO 5968 p = 1, N
 1741:                   IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 )
 1742:      $                .OR. ( p .LT. q ) )
 1743: *     $                V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
 1744:      $                V(p,q) = CTEMP        
 1745:                   IF ( p .LT. q ) V(p,q) = - V(p,q)
 1746:  5968          CONTINUE
 1747:  5969       CONTINUE
 1748:          ELSE
 1749:             CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV )
 1750:          END IF
 1751: 
 1752:          CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
 1753:      $        LWORK-2*N, IERR )
 1754:          CALL ZLACPY( 'L', N, NR, V, LDV, CWORK(2*N+1), N )
 1755: *
 1756:          DO 7969 p = 1, NR
 1757:             CALL ZCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 )
 1758:             CALL ZLACGV( NR-p+1, U(p,p), 1 )
 1759:  7969    CONTINUE
 1760: 
 1761:          IF ( L2PERT ) THEN
 1762:             XSC = DSQRT(SMALL/EPSLN)
 1763:             DO 9970 q = 2, NR
 1764:                DO 9971 p = 1, q - 1
 1765:                   CTEMP = DCMPLX(XSC * DMIN1(ABS(U(p,p)),ABS(U(q,q))),
 1766:      $                            ZERO)
 1767: *                  U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) )
 1768:                   U(p,q) = - CTEMP     
 1769:  9971          CONTINUE
 1770:  9970       CONTINUE
 1771:          ELSE
 1772:             CALL ZLASET('U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU )
 1773:          END IF
 1774: 
 1775:          CALL ZGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA,
 1776:      $        N, V, LDV, CWORK(2*N+N*NR+1), LWORK-2*N-N*NR, 
 1777:      $         RWORK, LRWORK, INFO )
 1778:          SCALEM  = RWORK(1)
 1779:          NUMRANK = NINT(RWORK(2))
 1780: 
 1781:          IF ( NR .LT. N ) THEN
 1782:             CALL ZLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV )
 1783:             CALL ZLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV )
 1784:             CALL ZLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV )
 1785:          END IF
 1786: 
 1787:          CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1),
 1788:      $        V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
 1789: *
 1790: *           Permute the rows of V using the (column) permutation from the
 1791: *           first QRF. Also, scale the columns to make them unit in
 1792: *           Euclidean norm. This applies to all cases.
 1793: *
 1794:             TEMP1 = DSQRT(DFLOAT(N)) * EPSLN
 1795:             DO 7972 q = 1, N
 1796:                DO 8972 p = 1, N
 1797:                   CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q)
 1798:  8972          CONTINUE
 1799:                DO 8973 p = 1, N
 1800:                   V(p,q) = CWORK(2*N+N*NR+NR+p)
 1801:  8973          CONTINUE
 1802:                XSC = ONE / DZNRM2( N, V(1,q), 1 )
 1803:                IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
 1804:      $           CALL ZDSCAL( N, XSC, V(1,q), 1 )
 1805:  7972       CONTINUE
 1806: *
 1807: *           At this moment, V contains the right singular vectors of A.
 1808: *           Next, assemble the left singular vector matrix U (M x N).
 1809: *
 1810:          IF ( NR .LT. M ) THEN
 1811:             CALL ZLASET( 'A',  M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU )
 1812:             IF ( NR .LT. N1 ) THEN
 1813:                CALL ZLASET('A',NR,  N1-NR, CZERO, CZERO,  U(1,NR+1),LDU)
 1814:                CALL ZLASET('A',M-NR,N1-NR, CZERO, CONE,U(NR+1,NR+1),LDU)
 1815:             END IF
 1816:          END IF
 1817: *
 1818:          CALL ZUNMQR( 'Left', 'No Tr', M, N1, N, A, LDA, CWORK, U,
 1819:      $        LDU, CWORK(N+1), LWORK-N, IERR )
 1820: *
 1821:             IF ( ROWPIV )
 1822:      $         CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
 1823: *
 1824: *
 1825:          END IF
 1826:          IF ( TRANSP ) THEN
 1827: *           .. swap U and V because the procedure worked on A^*
 1828:             DO 6974 p = 1, N
 1829:                CALL ZSWAP( N, U(1,p), 1, V(1,p), 1 )
 1830:  6974       CONTINUE
 1831:          END IF
 1832: *
 1833:       END IF
 1834: *     end of the full SVD
 1835: *
 1836: *     Undo scaling, if necessary (and possible)
 1837: *
 1838:       IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN
 1839:          CALL ZLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR )
 1840:          USCAL1 = ONE
 1841:          USCAL2 = ONE
 1842:       END IF
 1843: *
 1844:       IF ( NR .LT. N ) THEN
 1845:          DO 3004 p = NR+1, N
 1846:             SVA(p) = ZERO
 1847:  3004    CONTINUE
 1848:       END IF
 1849: *
 1850:       RWORK(1) = USCAL2 * SCALEM
 1851:       RWORK(2) = USCAL1
 1852:       IF ( ERREST ) RWORK(3) = SCONDA
 1853:       IF ( LSVEC .AND. RSVEC ) THEN
 1854:          RWORK(4) = CONDR1
 1855:          RWORK(5) = CONDR2
 1856:       END IF
 1857:       IF ( L2TRAN ) THEN
 1858:          RWORK(6) = ENTRA
 1859:          RWORK(7) = ENTRAT
 1860:       END IF
 1861: *
 1862:       IWORK(1) = NR
 1863:       IWORK(2) = NUMRANK
 1864:       IWORK(3) = WARNING
 1865: *
 1866:       RETURN
 1867: *     ..
 1868: *     .. END OF ZGEJSV
 1869: *     ..
 1870:       END
 1871: *

CVSweb interface <joel.bertrand@systella.fr>