File:  [local] / rpl / lapack / lapack / dgesdd.f
Revision 1.22: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:38:49 2023 UTC (8 months, 3 weeks ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD
Première mise à jour de lapack et blas.

    1: *> \brief \b DGESDD
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at
    6: *            http://www.netlib.org/lapack/explore-html/
    7: *
    8: *> \htmlonly
    9: *> Download DGESDD + dependencies
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesdd.f">
   11: *> [TGZ]</a>
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesdd.f">
   13: *> [ZIP]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesdd.f">
   15: *> [TXT]</a>
   16: *> \endhtmlonly
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *       SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
   22: *                          WORK, LWORK, IWORK, INFO )
   23: *
   24: *       .. Scalar Arguments ..
   25: *       CHARACTER          JOBZ
   26: *       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
   27: *       ..
   28: *       .. Array Arguments ..
   29: *       INTEGER            IWORK( * )
   30: *       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
   31: *      $                   VT( LDVT, * ), WORK( * )
   32: *       ..
   33: *
   34: *
   35: *> \par Purpose:
   36: *  =============
   37: *>
   38: *> \verbatim
   39: *>
   40: *> DGESDD computes the singular value decomposition (SVD) of a real
   41: *> M-by-N matrix A, optionally computing the left and right singular
   42: *> vectors.  If singular vectors are desired, it uses a
   43: *> divide-and-conquer algorithm.
   44: *>
   45: *> The SVD is written
   46: *>
   47: *>      A = U * SIGMA * transpose(V)
   48: *>
   49: *> where SIGMA is an M-by-N matrix which is zero except for its
   50: *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
   51: *> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
   52: *> are the singular values of A; they are real and non-negative, and
   53: *> are returned in descending order.  The first min(m,n) columns of
   54: *> U and V are the left and right singular vectors of A.
   55: *>
   56: *> Note that the routine returns VT = V**T, not V.
   57: *>
   58: *> The divide and conquer algorithm makes very mild assumptions about
   59: *> floating point arithmetic. It will work on machines with a guard
   60: *> digit in add/subtract, or on those binary machines without guard
   61: *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
   62: *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
   63: *> without guard digits, but we know of none.
   64: *> \endverbatim
   65: *
   66: *  Arguments:
   67: *  ==========
   68: *
   69: *> \param[in] JOBZ
   70: *> \verbatim
   71: *>          JOBZ is CHARACTER*1
   72: *>          Specifies options for computing all or part of the matrix U:
   73: *>          = 'A':  all M columns of U and all N rows of V**T are
   74: *>                  returned in the arrays U and VT;
   75: *>          = 'S':  the first min(M,N) columns of U and the first
   76: *>                  min(M,N) rows of V**T are returned in the arrays U
   77: *>                  and VT;
   78: *>          = 'O':  If M >= N, the first N columns of U are overwritten
   79: *>                  on the array A and all rows of V**T are returned in
   80: *>                  the array VT;
   81: *>                  otherwise, all columns of U are returned in the
   82: *>                  array U and the first M rows of V**T are overwritten
   83: *>                  in the array A;
   84: *>          = 'N':  no columns of U or rows of V**T are computed.
   85: *> \endverbatim
   86: *>
   87: *> \param[in] M
   88: *> \verbatim
   89: *>          M is INTEGER
   90: *>          The number of rows of the input matrix A.  M >= 0.
   91: *> \endverbatim
   92: *>
   93: *> \param[in] N
   94: *> \verbatim
   95: *>          N is INTEGER
   96: *>          The number of columns of the input matrix A.  N >= 0.
   97: *> \endverbatim
   98: *>
   99: *> \param[in,out] A
  100: *> \verbatim
  101: *>          A is DOUBLE PRECISION array, dimension (LDA,N)
  102: *>          On entry, the M-by-N matrix A.
  103: *>          On exit,
  104: *>          if JOBZ = 'O',  A is overwritten with the first N columns
  105: *>                          of U (the left singular vectors, stored
  106: *>                          columnwise) if M >= N;
  107: *>                          A is overwritten with the first M rows
  108: *>                          of V**T (the right singular vectors, stored
  109: *>                          rowwise) otherwise.
  110: *>          if JOBZ .ne. 'O', the contents of A are destroyed.
  111: *> \endverbatim
  112: *>
  113: *> \param[in] LDA
  114: *> \verbatim
  115: *>          LDA is INTEGER
  116: *>          The leading dimension of the array A.  LDA >= max(1,M).
  117: *> \endverbatim
  118: *>
  119: *> \param[out] S
  120: *> \verbatim
  121: *>          S is DOUBLE PRECISION array, dimension (min(M,N))
  122: *>          The singular values of A, sorted so that S(i) >= S(i+1).
  123: *> \endverbatim
  124: *>
  125: *> \param[out] U
  126: *> \verbatim
  127: *>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
  128: *>          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
  129: *>          UCOL = min(M,N) if JOBZ = 'S'.
  130: *>          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
  131: *>          orthogonal matrix U;
  132: *>          if JOBZ = 'S', U contains the first min(M,N) columns of U
  133: *>          (the left singular vectors, stored columnwise);
  134: *>          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
  135: *> \endverbatim
  136: *>
  137: *> \param[in] LDU
  138: *> \verbatim
  139: *>          LDU is INTEGER
  140: *>          The leading dimension of the array U.  LDU >= 1; if
  141: *>          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
  142: *> \endverbatim
  143: *>
  144: *> \param[out] VT
  145: *> \verbatim
  146: *>          VT is DOUBLE PRECISION array, dimension (LDVT,N)
  147: *>          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
  148: *>          N-by-N orthogonal matrix V**T;
  149: *>          if JOBZ = 'S', VT contains the first min(M,N) rows of
  150: *>          V**T (the right singular vectors, stored rowwise);
  151: *>          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
  152: *> \endverbatim
  153: *>
  154: *> \param[in] LDVT
  155: *> \verbatim
  156: *>          LDVT is INTEGER
  157: *>          The leading dimension of the array VT.  LDVT >= 1;
  158: *>          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
  159: *>          if JOBZ = 'S', LDVT >= min(M,N).
  160: *> \endverbatim
  161: *>
  162: *> \param[out] WORK
  163: *> \verbatim
  164: *>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
  165: *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
  166: *> \endverbatim
  167: *>
  168: *> \param[in] LWORK
  169: *> \verbatim
  170: *>          LWORK is INTEGER
  171: *>          The dimension of the array WORK. LWORK >= 1.
  172: *>          If LWORK = -1, a workspace query is assumed.  The optimal
  173: *>          size for the WORK array is calculated and stored in WORK(1),
  174: *>          and no other work except argument checking is performed.
  175: *>
  176: *>          Let mx = max(M,N) and mn = min(M,N).
  177: *>          If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
  178: *>          If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
  179: *>          If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
  180: *>          If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
  181: *>          These are not tight minimums in all cases; see comments inside code.
  182: *>          For good performance, LWORK should generally be larger;
  183: *>          a query is recommended.
  184: *> \endverbatim
  185: *>
  186: *> \param[out] IWORK
  187: *> \verbatim
  188: *>          IWORK is INTEGER array, dimension (8*min(M,N))
  189: *> \endverbatim
  190: *>
  191: *> \param[out] INFO
  192: *> \verbatim
  193: *>          INFO is INTEGER
  194: *>          <  0:  if INFO = -i, the i-th argument had an illegal value.
  195: *>          = -4:  if A had a NAN entry.
  196: *>          >  0:  DBDSDC did not converge, updating process failed.
  197: *>          =  0:  successful exit.
  198: *> \endverbatim
  199: *
  200: *  Authors:
  201: *  ========
  202: *
  203: *> \author Univ. of Tennessee
  204: *> \author Univ. of California Berkeley
  205: *> \author Univ. of Colorado Denver
  206: *> \author NAG Ltd.
  207: *
  208: *> \ingroup doubleGEsing
  209: *
  210: *> \par Contributors:
  211: *  ==================
  212: *>
  213: *>     Ming Gu and Huan Ren, Computer Science Division, University of
  214: *>     California at Berkeley, USA
  215: *>
  216: *  =====================================================================
  217:       SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
  218:      $                   WORK, LWORK, IWORK, INFO )
  219:       implicit none
  220: *
  221: *  -- LAPACK driver routine --
  222: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  223: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  224: *
  225: *     .. Scalar Arguments ..
  226:       CHARACTER          JOBZ
  227:       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
  228: *     ..
  229: *     .. Array Arguments ..
  230:       INTEGER            IWORK( * )
  231:       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
  232:      $                   VT( LDVT, * ), WORK( * )
  233: *     ..
  234: *
  235: *  =====================================================================
  236: *
  237: *     .. Parameters ..
  238:       DOUBLE PRECISION   ZERO, ONE
  239:       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
  240: *     ..
  241: *     .. Local Scalars ..
  242:       LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
  243:       INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
  244:      $                   IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
  245:      $                   LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
  246:      $                   MNTHR, NWORK, WRKBL
  247:       INTEGER            LWORK_DGEBRD_MN, LWORK_DGEBRD_MM,
  248:      $                   LWORK_DGEBRD_NN, LWORK_DGELQF_MN,
  249:      $                   LWORK_DGEQRF_MN,
  250:      $                   LWORK_DORGBR_P_MM, LWORK_DORGBR_Q_NN,
  251:      $                   LWORK_DORGLQ_MN, LWORK_DORGLQ_NN,
  252:      $                   LWORK_DORGQR_MM, LWORK_DORGQR_MN,
  253:      $                   LWORK_DORMBR_PRT_MM, LWORK_DORMBR_QLN_MM,
  254:      $                   LWORK_DORMBR_PRT_MN, LWORK_DORMBR_QLN_MN,
  255:      $                   LWORK_DORMBR_PRT_NN, LWORK_DORMBR_QLN_NN
  256:       DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
  257: *     ..
  258: *     .. Local Arrays ..
  259:       INTEGER            IDUM( 1 )
  260:       DOUBLE PRECISION   DUM( 1 )
  261: *     ..
  262: *     .. External Subroutines ..
  263:       EXTERNAL           DBDSDC, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
  264:      $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
  265:      $                   XERBLA
  266: *     ..
  267: *     .. External Functions ..
  268:       LOGICAL            LSAME, DISNAN
  269:       DOUBLE PRECISION   DLAMCH, DLANGE, DROUNDUP_LWORK
  270:       EXTERNAL           DLAMCH, DLANGE, LSAME, DISNAN, 
  271:      $                   DROUNDUP_LWORK
  272: *     ..
  273: *     .. Intrinsic Functions ..
  274:       INTRINSIC          INT, MAX, MIN, SQRT
  275: *     ..
  276: *     .. Executable Statements ..
  277: *
  278: *     Test the input arguments
  279: *
  280:       INFO   = 0
  281:       MINMN  = MIN( M, N )
  282:       WNTQA  = LSAME( JOBZ, 'A' )
  283:       WNTQS  = LSAME( JOBZ, 'S' )
  284:       WNTQAS = WNTQA .OR. WNTQS
  285:       WNTQO  = LSAME( JOBZ, 'O' )
  286:       WNTQN  = LSAME( JOBZ, 'N' )
  287:       LQUERY = ( LWORK.EQ.-1 )
  288: *
  289:       IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN
  290:          INFO = -1
  291:       ELSE IF( M.LT.0 ) THEN
  292:          INFO = -2
  293:       ELSE IF( N.LT.0 ) THEN
  294:          INFO = -3
  295:       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
  296:          INFO = -5
  297:       ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR.
  298:      $         ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN
  299:          INFO = -8
  300:       ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR.
  301:      $         ( WNTQS .AND. LDVT.LT.MINMN ) .OR.
  302:      $         ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN
  303:          INFO = -10
  304:       END IF
  305: *
  306: *     Compute workspace
  307: *       Note: Comments in the code beginning "Workspace:" describe the
  308: *       minimal amount of workspace allocated at that point in the code,
  309: *       as well as the preferred amount for good performance.
  310: *       NB refers to the optimal block size for the immediately
  311: *       following subroutine, as returned by ILAENV.
  312: *
  313:       IF( INFO.EQ.0 ) THEN
  314:          MINWRK = 1
  315:          MAXWRK = 1
  316:          BDSPAC = 0
  317:          MNTHR  = INT( MINMN*11.0D0 / 6.0D0 )
  318:          IF( M.GE.N .AND. MINMN.GT.0 ) THEN
  319: *
  320: *           Compute space needed for DBDSDC
  321: *
  322:             IF( WNTQN ) THEN
  323: *              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
  324: *              keep 7*N for backwards compatibility.
  325:                BDSPAC = 7*N
  326:             ELSE
  327:                BDSPAC = 3*N*N + 4*N
  328:             END IF
  329: *
  330: *           Compute space preferred for each routine
  331:             CALL DGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1),
  332:      $                   DUM(1), DUM(1), -1, IERR )
  333:             LWORK_DGEBRD_MN = INT( DUM(1) )
  334: *
  335:             CALL DGEBRD( N, N, DUM(1), N, DUM(1), DUM(1), DUM(1),
  336:      $                   DUM(1), DUM(1), -1, IERR )
  337:             LWORK_DGEBRD_NN = INT( DUM(1) )
  338: *
  339:             CALL DGEQRF( M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
  340:             LWORK_DGEQRF_MN = INT( DUM(1) )
  341: *
  342:             CALL DORGBR( 'Q', N, N, N, DUM(1), N, DUM(1), DUM(1), -1,
  343:      $                   IERR )
  344:             LWORK_DORGBR_Q_NN = INT( DUM(1) )
  345: *
  346:             CALL DORGQR( M, M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
  347:             LWORK_DORGQR_MM = INT( DUM(1) )
  348: *
  349:             CALL DORGQR( M, N, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
  350:             LWORK_DORGQR_MN = INT( DUM(1) )
  351: *
  352:             CALL DORMBR( 'P', 'R', 'T', N, N, N, DUM(1), N,
  353:      $                   DUM(1), DUM(1), N, DUM(1), -1, IERR )
  354:             LWORK_DORMBR_PRT_NN = INT( DUM(1) )
  355: *
  356:             CALL DORMBR( 'Q', 'L', 'N', N, N, N, DUM(1), N,
  357:      $                   DUM(1), DUM(1), N, DUM(1), -1, IERR )
  358:             LWORK_DORMBR_QLN_NN = INT( DUM(1) )
  359: *
  360:             CALL DORMBR( 'Q', 'L', 'N', M, N, N, DUM(1), M,
  361:      $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
  362:             LWORK_DORMBR_QLN_MN = INT( DUM(1) )
  363: *
  364:             CALL DORMBR( 'Q', 'L', 'N', M, M, N, DUM(1), M,
  365:      $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
  366:             LWORK_DORMBR_QLN_MM = INT( DUM(1) )
  367: *
  368:             IF( M.GE.MNTHR ) THEN
  369:                IF( WNTQN ) THEN
  370: *
  371: *                 Path 1 (M >> N, JOBZ='N')
  372: *
  373:                   WRKBL = N + LWORK_DGEQRF_MN
  374:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN )
  375:                   MAXWRK = MAX( WRKBL, BDSPAC + N )
  376:                   MINWRK = BDSPAC + N
  377:                ELSE IF( WNTQO ) THEN
  378: *
  379: *                 Path 2 (M >> N, JOBZ='O')
  380: *
  381:                   WRKBL = N + LWORK_DGEQRF_MN
  382:                   WRKBL = MAX( WRKBL,   N + LWORK_DORGQR_MN )
  383:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN )
  384:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN )
  385:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
  386:                   WRKBL = MAX( WRKBL, 3*N + BDSPAC )
  387:                   MAXWRK = WRKBL + 2*N*N
  388:                   MINWRK = BDSPAC + 2*N*N + 3*N
  389:                ELSE IF( WNTQS ) THEN
  390: *
  391: *                 Path 3 (M >> N, JOBZ='S')
  392: *
  393:                   WRKBL = N + LWORK_DGEQRF_MN
  394:                   WRKBL = MAX( WRKBL,   N + LWORK_DORGQR_MN )
  395:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN )
  396:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN )
  397:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
  398:                   WRKBL = MAX( WRKBL, 3*N + BDSPAC )
  399:                   MAXWRK = WRKBL + N*N
  400:                   MINWRK = BDSPAC + N*N + 3*N
  401:                ELSE IF( WNTQA ) THEN
  402: *
  403: *                 Path 4 (M >> N, JOBZ='A')
  404: *
  405:                   WRKBL = N + LWORK_DGEQRF_MN
  406:                   WRKBL = MAX( WRKBL,   N + LWORK_DORGQR_MM )
  407:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN )
  408:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN )
  409:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
  410:                   WRKBL = MAX( WRKBL, 3*N + BDSPAC )
  411:                   MAXWRK = WRKBL + N*N
  412:                   MINWRK = N*N + MAX( 3*N + BDSPAC, N + M )
  413:                END IF
  414:             ELSE
  415: *
  416: *              Path 5 (M >= N, but not much larger)
  417: *
  418:                WRKBL = 3*N + LWORK_DGEBRD_MN
  419:                IF( WNTQN ) THEN
  420: *                 Path 5n (M >= N, jobz='N')
  421:                   MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
  422:                   MINWRK = 3*N + MAX( M, BDSPAC )
  423:                ELSE IF( WNTQO ) THEN
  424: *                 Path 5o (M >= N, jobz='O')
  425:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
  426:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MN )
  427:                   WRKBL = MAX( WRKBL, 3*N + BDSPAC )
  428:                   MAXWRK = WRKBL + M*N
  429:                   MINWRK = 3*N + MAX( M, N*N + BDSPAC )
  430:                ELSE IF( WNTQS ) THEN
  431: *                 Path 5s (M >= N, jobz='S')
  432:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MN )
  433:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
  434:                   MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
  435:                   MINWRK = 3*N + MAX( M, BDSPAC )
  436:                ELSE IF( WNTQA ) THEN
  437: *                 Path 5a (M >= N, jobz='A')
  438:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MM )
  439:                   WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
  440:                   MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
  441:                   MINWRK = 3*N + MAX( M, BDSPAC )
  442:                END IF
  443:             END IF
  444:          ELSE IF( MINMN.GT.0 ) THEN
  445: *
  446: *           Compute space needed for DBDSDC
  447: *
  448:             IF( WNTQN ) THEN
  449: *              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
  450: *              keep 7*N for backwards compatibility.
  451:                BDSPAC = 7*M
  452:             ELSE
  453:                BDSPAC = 3*M*M + 4*M
  454:             END IF
  455: *
  456: *           Compute space preferred for each routine
  457:             CALL DGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1),
  458:      $                   DUM(1), DUM(1), -1, IERR )
  459:             LWORK_DGEBRD_MN = INT( DUM(1) )
  460: *
  461:             CALL DGEBRD( M, M, A, M, S, DUM(1), DUM(1),
  462:      $                   DUM(1), DUM(1), -1, IERR )
  463:             LWORK_DGEBRD_MM = INT( DUM(1) )
  464: *
  465:             CALL DGELQF( M, N, A, M, DUM(1), DUM(1), -1, IERR )
  466:             LWORK_DGELQF_MN = INT( DUM(1) )
  467: *
  468:             CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
  469:             LWORK_DORGLQ_NN = INT( DUM(1) )
  470: *
  471:             CALL DORGLQ( M, N, M, A, M, DUM(1), DUM(1), -1, IERR )
  472:             LWORK_DORGLQ_MN = INT( DUM(1) )
  473: *
  474:             CALL DORGBR( 'P', M, M, M, A, N, DUM(1), DUM(1), -1, IERR )
  475:             LWORK_DORGBR_P_MM = INT( DUM(1) )
  476: *
  477:             CALL DORMBR( 'P', 'R', 'T', M, M, M, DUM(1), M,
  478:      $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
  479:             LWORK_DORMBR_PRT_MM = INT( DUM(1) )
  480: *
  481:             CALL DORMBR( 'P', 'R', 'T', M, N, M, DUM(1), M,
  482:      $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
  483:             LWORK_DORMBR_PRT_MN = INT( DUM(1) )
  484: *
  485:             CALL DORMBR( 'P', 'R', 'T', N, N, M, DUM(1), N,
  486:      $                   DUM(1), DUM(1), N, DUM(1), -1, IERR )
  487:             LWORK_DORMBR_PRT_NN = INT( DUM(1) )
  488: *
  489:             CALL DORMBR( 'Q', 'L', 'N', M, M, M, DUM(1), M,
  490:      $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
  491:             LWORK_DORMBR_QLN_MM = INT( DUM(1) )
  492: *
  493:             IF( N.GE.MNTHR ) THEN
  494:                IF( WNTQN ) THEN
  495: *
  496: *                 Path 1t (N >> M, JOBZ='N')
  497: *
  498:                   WRKBL = M + LWORK_DGELQF_MN
  499:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM )
  500:                   MAXWRK = MAX( WRKBL, BDSPAC + M )
  501:                   MINWRK = BDSPAC + M
  502:                ELSE IF( WNTQO ) THEN
  503: *
  504: *                 Path 2t (N >> M, JOBZ='O')
  505: *
  506:                   WRKBL = M + LWORK_DGELQF_MN
  507:                   WRKBL = MAX( WRKBL,   M + LWORK_DORGLQ_MN )
  508:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM )
  509:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
  510:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM )
  511:                   WRKBL = MAX( WRKBL, 3*M + BDSPAC )
  512:                   MAXWRK = WRKBL + 2*M*M
  513:                   MINWRK = BDSPAC + 2*M*M + 3*M
  514:                ELSE IF( WNTQS ) THEN
  515: *
  516: *                 Path 3t (N >> M, JOBZ='S')
  517: *
  518:                   WRKBL = M + LWORK_DGELQF_MN
  519:                   WRKBL = MAX( WRKBL,   M + LWORK_DORGLQ_MN )
  520:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM )
  521:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
  522:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM )
  523:                   WRKBL = MAX( WRKBL, 3*M + BDSPAC )
  524:                   MAXWRK = WRKBL + M*M
  525:                   MINWRK = BDSPAC + M*M + 3*M
  526:                ELSE IF( WNTQA ) THEN
  527: *
  528: *                 Path 4t (N >> M, JOBZ='A')
  529: *
  530:                   WRKBL = M + LWORK_DGELQF_MN
  531:                   WRKBL = MAX( WRKBL,   M + LWORK_DORGLQ_NN )
  532:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM )
  533:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
  534:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM )
  535:                   WRKBL = MAX( WRKBL, 3*M + BDSPAC )
  536:                   MAXWRK = WRKBL + M*M
  537:                   MINWRK = M*M + MAX( 3*M + BDSPAC, M + N )
  538:                END IF
  539:             ELSE
  540: *
  541: *              Path 5t (N > M, but not much larger)
  542: *
  543:                WRKBL = 3*M + LWORK_DGEBRD_MN
  544:                IF( WNTQN ) THEN
  545: *                 Path 5tn (N > M, jobz='N')
  546:                   MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
  547:                   MINWRK = 3*M + MAX( N, BDSPAC )
  548:                ELSE IF( WNTQO ) THEN
  549: *                 Path 5to (N > M, jobz='O')
  550:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
  551:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MN )
  552:                   WRKBL = MAX( WRKBL, 3*M + BDSPAC )
  553:                   MAXWRK = WRKBL + M*N
  554:                   MINWRK = 3*M + MAX( N, M*M + BDSPAC )
  555:                ELSE IF( WNTQS ) THEN
  556: *                 Path 5ts (N > M, jobz='S')
  557:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
  558:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MN )
  559:                   MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
  560:                   MINWRK = 3*M + MAX( N, BDSPAC )
  561:                ELSE IF( WNTQA ) THEN
  562: *                 Path 5ta (N > M, jobz='A')
  563:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
  564:                   WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_NN )
  565:                   MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
  566:                   MINWRK = 3*M + MAX( N, BDSPAC )
  567:                END IF
  568:             END IF
  569:          END IF
  570: 
  571:          MAXWRK = MAX( MAXWRK, MINWRK )
  572:          WORK( 1 ) = DROUNDUP_LWORK( MAXWRK )
  573: *
  574:          IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
  575:             INFO = -12
  576:          END IF
  577:       END IF
  578: *
  579:       IF( INFO.NE.0 ) THEN
  580:          CALL XERBLA( 'DGESDD', -INFO )
  581:          RETURN
  582:       ELSE IF( LQUERY ) THEN
  583:          RETURN
  584:       END IF
  585: *
  586: *     Quick return if possible
  587: *
  588:       IF( M.EQ.0 .OR. N.EQ.0 ) THEN
  589:          RETURN
  590:       END IF
  591: *
  592: *     Get machine constants
  593: *
  594:       EPS = DLAMCH( 'P' )
  595:       SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
  596:       BIGNUM = ONE / SMLNUM
  597: *
  598: *     Scale A if max element outside range [SMLNUM,BIGNUM]
  599: *
  600:       ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
  601:       IF( DISNAN( ANRM ) ) THEN
  602:           INFO = -4
  603:           RETURN
  604:       END IF
  605:       ISCL = 0
  606:       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
  607:          ISCL = 1
  608:          CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
  609:       ELSE IF( ANRM.GT.BIGNUM ) THEN
  610:          ISCL = 1
  611:          CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
  612:       END IF
  613: *
  614:       IF( M.GE.N ) THEN
  615: *
  616: *        A has at least as many rows as columns. If A has sufficiently
  617: *        more rows than columns, first reduce using the QR
  618: *        decomposition (if sufficient workspace available)
  619: *
  620:          IF( M.GE.MNTHR ) THEN
  621: *
  622:             IF( WNTQN ) THEN
  623: *
  624: *              Path 1 (M >> N, JOBZ='N')
  625: *              No singular vectors to be computed
  626: *
  627:                ITAU = 1
  628:                NWORK = ITAU + N
  629: *
  630: *              Compute A=Q*R
  631: *              Workspace: need   N [tau] + N    [work]
  632: *              Workspace: prefer N [tau] + N*NB [work]
  633: *
  634:                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  635:      $                      LWORK - NWORK + 1, IERR )
  636: *
  637: *              Zero out below R
  638: *
  639:                CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
  640:                IE = 1
  641:                ITAUQ = IE + N
  642:                ITAUP = ITAUQ + N
  643:                NWORK = ITAUP + N
  644: *
  645: *              Bidiagonalize R in A
  646: *              Workspace: need   3*N [e, tauq, taup] + N      [work]
  647: *              Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
  648: *
  649:                CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  650:      $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  651:      $                      IERR )
  652:                NWORK = IE + N
  653: *
  654: *              Perform bidiagonal SVD, computing singular values only
  655: *              Workspace: need   N [e] + BDSPAC
  656: *
  657:                CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
  658:      $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
  659: *
  660:             ELSE IF( WNTQO ) THEN
  661: *
  662: *              Path 2 (M >> N, JOBZ = 'O')
  663: *              N left singular vectors to be overwritten on A and
  664: *              N right singular vectors to be computed in VT
  665: *
  666:                IR = 1
  667: *
  668: *              WORK(IR) is LDWRKR by N
  669: *
  670:                IF( LWORK .GE. LDA*N + N*N + 3*N + BDSPAC ) THEN
  671:                   LDWRKR = LDA
  672:                ELSE
  673:                   LDWRKR = ( LWORK - N*N - 3*N - BDSPAC ) / N
  674:                END IF
  675:                ITAU = IR + LDWRKR*N
  676:                NWORK = ITAU + N
  677: *
  678: *              Compute A=Q*R
  679: *              Workspace: need   N*N [R] + N [tau] + N    [work]
  680: *              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
  681: *
  682:                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  683:      $                      LWORK - NWORK + 1, IERR )
  684: *
  685: *              Copy R to WORK(IR), zeroing out below it
  686: *
  687:                CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
  688:                CALL DLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1),
  689:      $                      LDWRKR )
  690: *
  691: *              Generate Q in A
  692: *              Workspace: need   N*N [R] + N [tau] + N    [work]
  693: *              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
  694: *
  695:                CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
  696:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
  697:                IE = ITAU
  698:                ITAUQ = IE + N
  699:                ITAUP = ITAUQ + N
  700:                NWORK = ITAUP + N
  701: *
  702: *              Bidiagonalize R in WORK(IR)
  703: *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]
  704: *              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
  705: *
  706:                CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
  707:      $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
  708:      $                      LWORK - NWORK + 1, IERR )
  709: *
  710: *              WORK(IU) is N by N
  711: *
  712:                IU = NWORK
  713:                NWORK = IU + N*N
  714: *
  715: *              Perform bidiagonal SVD, computing left singular vectors
  716: *              of bidiagonal matrix in WORK(IU) and computing right
  717: *              singular vectors of bidiagonal matrix in VT
  718: *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
  719: *
  720:                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
  721:      $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  722:      $                      INFO )
  723: *
  724: *              Overwrite WORK(IU) by left singular vectors of R
  725: *              and VT by right singular vectors of R
  726: *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N    [work]
  727: *              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
  728: *
  729:                CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
  730:      $                      WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ),
  731:      $                      LWORK - NWORK + 1, IERR )
  732:                CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
  733:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  734:      $                      LWORK - NWORK + 1, IERR )
  735: *
  736: *              Multiply Q in A by left singular vectors of R in
  737: *              WORK(IU), storing result in WORK(IR) and copying to A
  738: *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U]
  739: *              Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
  740: *
  741:                DO 10 I = 1, M, LDWRKR
  742:                   CHUNK = MIN( M - I + 1, LDWRKR )
  743:                   CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
  744:      $                        LDA, WORK( IU ), N, ZERO, WORK( IR ),
  745:      $                        LDWRKR )
  746:                   CALL DLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
  747:      $                         A( I, 1 ), LDA )
  748:    10          CONTINUE
  749: *
  750:             ELSE IF( WNTQS ) THEN
  751: *
  752: *              Path 3 (M >> N, JOBZ='S')
  753: *              N left singular vectors to be computed in U and
  754: *              N right singular vectors to be computed in VT
  755: *
  756:                IR = 1
  757: *
  758: *              WORK(IR) is N by N
  759: *
  760:                LDWRKR = N
  761:                ITAU = IR + LDWRKR*N
  762:                NWORK = ITAU + N
  763: *
  764: *              Compute A=Q*R
  765: *              Workspace: need   N*N [R] + N [tau] + N    [work]
  766: *              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
  767: *
  768:                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  769:      $                      LWORK - NWORK + 1, IERR )
  770: *
  771: *              Copy R to WORK(IR), zeroing out below it
  772: *
  773:                CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
  774:                CALL DLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1),
  775:      $                      LDWRKR )
  776: *
  777: *              Generate Q in A
  778: *              Workspace: need   N*N [R] + N [tau] + N    [work]
  779: *              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
  780: *
  781:                CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
  782:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
  783:                IE = ITAU
  784:                ITAUQ = IE + N
  785:                ITAUP = ITAUQ + N
  786:                NWORK = ITAUP + N
  787: *
  788: *              Bidiagonalize R in WORK(IR)
  789: *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]
  790: *              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
  791: *
  792:                CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
  793:      $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
  794:      $                      LWORK - NWORK + 1, IERR )
  795: *
  796: *              Perform bidiagonal SVD, computing left singular vectors
  797: *              of bidiagoal matrix in U and computing right singular
  798: *              vectors of bidiagonal matrix in VT
  799: *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + BDSPAC
  800: *
  801:                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
  802:      $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  803:      $                      INFO )
  804: *
  805: *              Overwrite U by left singular vectors of R and VT
  806: *              by right singular vectors of R
  807: *              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N    [work]
  808: *              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
  809: *
  810:                CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
  811:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  812:      $                      LWORK - NWORK + 1, IERR )
  813: *
  814:                CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
  815:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  816:      $                      LWORK - NWORK + 1, IERR )
  817: *
  818: *              Multiply Q in A by left singular vectors of R in
  819: *              WORK(IR), storing result in U
  820: *              Workspace: need   N*N [R]
  821: *
  822:                CALL DLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR )
  823:                CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ),
  824:      $                     LDWRKR, ZERO, U, LDU )
  825: *
  826:             ELSE IF( WNTQA ) THEN
  827: *
  828: *              Path 4 (M >> N, JOBZ='A')
  829: *              M left singular vectors to be computed in U and
  830: *              N right singular vectors to be computed in VT
  831: *
  832:                IU = 1
  833: *
  834: *              WORK(IU) is N by N
  835: *
  836:                LDWRKU = N
  837:                ITAU = IU + LDWRKU*N
  838:                NWORK = ITAU + N
  839: *
  840: *              Compute A=Q*R, copying result to U
  841: *              Workspace: need   N*N [U] + N [tau] + N    [work]
  842: *              Workspace: prefer N*N [U] + N [tau] + N*NB [work]
  843: *
  844:                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  845:      $                      LWORK - NWORK + 1, IERR )
  846:                CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  847: *
  848: *              Generate Q in U
  849: *              Workspace: need   N*N [U] + N [tau] + M    [work]
  850: *              Workspace: prefer N*N [U] + N [tau] + M*NB [work]
  851:                CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
  852:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
  853: *
  854: *              Produce R in A, zeroing out other entries
  855: *
  856:                CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
  857:                IE = ITAU
  858:                ITAUQ = IE + N
  859:                ITAUP = ITAUQ + N
  860:                NWORK = ITAUP + N
  861: *
  862: *              Bidiagonalize R in A
  863: *              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N      [work]
  864: *              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
  865: *
  866:                CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  867:      $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  868:      $                      IERR )
  869: *
  870: *              Perform bidiagonal SVD, computing left singular vectors
  871: *              of bidiagonal matrix in WORK(IU) and computing right
  872: *              singular vectors of bidiagonal matrix in VT
  873: *              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + BDSPAC
  874: *
  875:                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
  876:      $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  877:      $                      INFO )
  878: *
  879: *              Overwrite WORK(IU) by left singular vectors of R and VT
  880: *              by right singular vectors of R
  881: *              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N    [work]
  882: *              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
  883: *
  884:                CALL DORMBR( 'Q', 'L', 'N', N, N, N, A, LDA,
  885:      $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
  886:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
  887:                CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
  888:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  889:      $                      LWORK - NWORK + 1, IERR )
  890: *
  891: *              Multiply Q in U by left singular vectors of R in
  892: *              WORK(IU), storing result in A
  893: *              Workspace: need   N*N [U]
  894: *
  895:                CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ),
  896:      $                     LDWRKU, ZERO, A, LDA )
  897: *
  898: *              Copy left singular vectors of A from A to U
  899: *
  900:                CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
  901: *
  902:             END IF
  903: *
  904:          ELSE
  905: *
  906: *           M .LT. MNTHR
  907: *
  908: *           Path 5 (M >= N, but not much larger)
  909: *           Reduce to bidiagonal form without QR decomposition
  910: *
  911:             IE = 1
  912:             ITAUQ = IE + N
  913:             ITAUP = ITAUQ + N
  914:             NWORK = ITAUP + N
  915: *
  916: *           Bidiagonalize A
  917: *           Workspace: need   3*N [e, tauq, taup] + M        [work]
  918: *           Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
  919: *
  920:             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  921:      $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  922:      $                   IERR )
  923:             IF( WNTQN ) THEN
  924: *
  925: *              Path 5n (M >= N, JOBZ='N')
  926: *              Perform bidiagonal SVD, only computing singular values
  927: *              Workspace: need   3*N [e, tauq, taup] + BDSPAC
  928: *
  929:                CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
  930:      $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
  931:             ELSE IF( WNTQO ) THEN
  932: *              Path 5o (M >= N, JOBZ='O')
  933:                IU = NWORK
  934:                IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN
  935: *
  936: *                 WORK( IU ) is M by N
  937: *
  938:                   LDWRKU = M
  939:                   NWORK = IU + LDWRKU*N
  940:                   CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IU ),
  941:      $                         LDWRKU )
  942: *                 IR is unused; silence compile warnings
  943:                   IR = -1
  944:                ELSE
  945: *
  946: *                 WORK( IU ) is N by N
  947: *
  948:                   LDWRKU = N
  949:                   NWORK = IU + LDWRKU*N
  950: *
  951: *                 WORK(IR) is LDWRKR by N
  952: *
  953:                   IR = NWORK
  954:                   LDWRKR = ( LWORK - N*N - 3*N ) / N
  955:                END IF
  956:                NWORK = IU + LDWRKU*N
  957: *
  958: *              Perform bidiagonal SVD, computing left singular vectors
  959: *              of bidiagonal matrix in WORK(IU) and computing right
  960: *              singular vectors of bidiagonal matrix in VT
  961: *              Workspace: need   3*N [e, tauq, taup] + N*N [U] + BDSPAC
  962: *
  963:                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ),
  964:      $                      LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ),
  965:      $                      IWORK, INFO )
  966: *
  967: *              Overwrite VT by right singular vectors of A
  968: *              Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]
  969: *              Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
  970: *
  971:                CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
  972:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  973:      $                      LWORK - NWORK + 1, IERR )
  974: *
  975:                IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN
  976: *
  977: *                 Path 5o-fast
  978: *                 Overwrite WORK(IU) by left singular vectors of A
  979: *                 Workspace: need   3*N [e, tauq, taup] + M*N [U] + N    [work]
  980: *                 Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
  981: *
  982:                   CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
  983:      $                         WORK( ITAUQ ), WORK( IU ), LDWRKU,
  984:      $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )
  985: *
  986: *                 Copy left singular vectors of A from WORK(IU) to A
  987: *
  988:                   CALL DLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA )
  989:                ELSE
  990: *
  991: *                 Path 5o-slow
  992: *                 Generate Q in A
  993: *                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]
  994: *                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
  995: *
  996:                   CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
  997:      $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )
  998: *
  999: *                 Multiply Q in A by left singular vectors of
 1000: *                 bidiagonal matrix in WORK(IU), storing result in
 1001: *                 WORK(IR) and copying to A
 1002: *                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + NB*N [R]
 1003: *                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N  [R]
 1004: *
 1005:                   DO 20 I = 1, M, LDWRKR
 1006:                      CHUNK = MIN( M - I + 1, LDWRKR )
 1007:                      CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
 1008:      $                           LDA, WORK( IU ), LDWRKU, ZERO,
 1009:      $                           WORK( IR ), LDWRKR )
 1010:                      CALL DLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
 1011:      $                            A( I, 1 ), LDA )
 1012:    20             CONTINUE
 1013:                END IF
 1014: *
 1015:             ELSE IF( WNTQS ) THEN
 1016: *
 1017: *              Path 5s (M >= N, JOBZ='S')
 1018: *              Perform bidiagonal SVD, computing left singular vectors
 1019: *              of bidiagonal matrix in U and computing right singular
 1020: *              vectors of bidiagonal matrix in VT
 1021: *              Workspace: need   3*N [e, tauq, taup] + BDSPAC
 1022: *
 1023:                CALL DLASET( 'F', M, N, ZERO, ZERO, U, LDU )
 1024:                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
 1025:      $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
 1026:      $                      INFO )
 1027: *
 1028: *              Overwrite U by left singular vectors of A and VT
 1029: *              by right singular vectors of A
 1030: *              Workspace: need   3*N [e, tauq, taup] + N    [work]
 1031: *              Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
 1032: *
 1033:                CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
 1034:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
 1035:      $                      LWORK - NWORK + 1, IERR )
 1036:                CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
 1037:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
 1038:      $                      LWORK - NWORK + 1, IERR )
 1039:             ELSE IF( WNTQA ) THEN
 1040: *
 1041: *              Path 5a (M >= N, JOBZ='A')
 1042: *              Perform bidiagonal SVD, computing left singular vectors
 1043: *              of bidiagonal matrix in U and computing right singular
 1044: *              vectors of bidiagonal matrix in VT
 1045: *              Workspace: need   3*N [e, tauq, taup] + BDSPAC
 1046: *
 1047:                CALL DLASET( 'F', M, M, ZERO, ZERO, U, LDU )
 1048:                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
 1049:      $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
 1050:      $                      INFO )
 1051: *
 1052: *              Set the right corner of U to identity matrix
 1053: *
 1054:                IF( M.GT.N ) THEN
 1055:                   CALL DLASET( 'F', M - N, M - N, ZERO, ONE, U(N+1,N+1),
 1056:      $                         LDU )
 1057:                END IF
 1058: *
 1059: *              Overwrite U by left singular vectors of A and VT
 1060: *              by right singular vectors of A
 1061: *              Workspace: need   3*N [e, tauq, taup] + M    [work]
 1062: *              Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
 1063: *
 1064:                CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
 1065:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
 1066:      $                      LWORK - NWORK + 1, IERR )
 1067:                CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
 1068:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
 1069:      $                      LWORK - NWORK + 1, IERR )
 1070:             END IF
 1071: *
 1072:          END IF
 1073: *
 1074:       ELSE
 1075: *
 1076: *        A has more columns than rows. If A has sufficiently more
 1077: *        columns than rows, first reduce using the LQ decomposition (if
 1078: *        sufficient workspace available)
 1079: *
 1080:          IF( N.GE.MNTHR ) THEN
 1081: *
 1082:             IF( WNTQN ) THEN
 1083: *
 1084: *              Path 1t (N >> M, JOBZ='N')
 1085: *              No singular vectors to be computed
 1086: *
 1087:                ITAU = 1
 1088:                NWORK = ITAU + M
 1089: *
 1090: *              Compute A=L*Q
 1091: *              Workspace: need   M [tau] + M [work]
 1092: *              Workspace: prefer M [tau] + M*NB [work]
 1093: *
 1094:                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
 1095:      $                      LWORK - NWORK + 1, IERR )
 1096: *
 1097: *              Zero out above L
 1098: *
 1099:                CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
 1100:                IE = 1
 1101:                ITAUQ = IE + M
 1102:                ITAUP = ITAUQ + M
 1103:                NWORK = ITAUP + M
 1104: *
 1105: *              Bidiagonalize L in A
 1106: *              Workspace: need   3*M [e, tauq, taup] + M      [work]
 1107: *              Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
 1108: *
 1109:                CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
 1110:      $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
 1111:      $                      IERR )
 1112:                NWORK = IE + M
 1113: *
 1114: *              Perform bidiagonal SVD, computing singular values only
 1115: *              Workspace: need   M [e] + BDSPAC
 1116: *
 1117:                CALL DBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
 1118:      $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
 1119: *
 1120:             ELSE IF( WNTQO ) THEN
 1121: *
 1122: *              Path 2t (N >> M, JOBZ='O')
 1123: *              M right singular vectors to be overwritten on A and
 1124: *              M left singular vectors to be computed in U
 1125: *
 1126:                IVT = 1
 1127: *
 1128: *              WORK(IVT) is M by M
 1129: *              WORK(IL)  is M by M; it is later resized to M by chunk for gemm
 1130: *
 1131:                IL = IVT + M*M
 1132:                IF( LWORK .GE. M*N + M*M + 3*M + BDSPAC ) THEN
 1133:                   LDWRKL = M
 1134:                   CHUNK = N
 1135:                ELSE
 1136:                   LDWRKL = M
 1137:                   CHUNK = ( LWORK - M*M ) / M
 1138:                END IF
 1139:                ITAU = IL + LDWRKL*M
 1140:                NWORK = ITAU + M
 1141: *
 1142: *              Compute A=L*Q
 1143: *              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]
 1144: *              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
 1145: *
 1146:                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
 1147:      $                      LWORK - NWORK + 1, IERR )
 1148: *
 1149: *              Copy L to WORK(IL), zeroing about above it
 1150: *
 1151:                CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
 1152:                CALL DLASET( 'U', M - 1, M - 1, ZERO, ZERO,
 1153:      $                      WORK( IL + LDWRKL ), LDWRKL )
 1154: *
 1155: *              Generate Q in A
 1156: *              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]
 1157: *              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
 1158: *
 1159:                CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
 1160:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 1161:                IE = ITAU
 1162:                ITAUQ = IE + M
 1163:                ITAUP = ITAUQ + M
 1164:                NWORK = ITAUP + M
 1165: *
 1166: *              Bidiagonalize L in WORK(IL)
 1167: *              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M      [work]
 1168: *              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
 1169: *
 1170:                CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
 1171:      $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
 1172:      $                      LWORK - NWORK + 1, IERR )
 1173: *
 1174: *              Perform bidiagonal SVD, computing left singular vectors
 1175: *              of bidiagonal matrix in U, and computing right singular
 1176: *              vectors of bidiagonal matrix in WORK(IVT)
 1177: *              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
 1178: *
 1179:                CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
 1180:      $                      WORK( IVT ), M, DUM, IDUM, WORK( NWORK ),
 1181:      $                      IWORK, INFO )
 1182: *
 1183: *              Overwrite U by left singular vectors of L and WORK(IVT)
 1184: *              by right singular vectors of L
 1185: *              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M    [work]
 1186: *              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
 1187: *
 1188:                CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
 1189:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
 1190:      $                      LWORK - NWORK + 1, IERR )
 1191:                CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
 1192:      $                      WORK( ITAUP ), WORK( IVT ), M,
 1193:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 1194: *
 1195: *              Multiply right singular vectors of L in WORK(IVT) by Q
 1196: *              in A, storing result in WORK(IL) and copying to A
 1197: *              Workspace: need   M*M [VT] + M*M [L]
 1198: *              Workspace: prefer M*M [VT] + M*N [L]
 1199: *              At this point, L is resized as M by chunk.
 1200: *
 1201:                DO 30 I = 1, N, CHUNK
 1202:                   BLK = MIN( N - I + 1, CHUNK )
 1203:                   CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M,
 1204:      $                        A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL )
 1205:                   CALL DLACPY( 'F', M, BLK, WORK( IL ), LDWRKL,
 1206:      $                         A( 1, I ), LDA )
 1207:    30          CONTINUE
 1208: *
 1209:             ELSE IF( WNTQS ) THEN
 1210: *
 1211: *              Path 3t (N >> M, JOBZ='S')
 1212: *              M right singular vectors to be computed in VT and
 1213: *              M left singular vectors to be computed in U
 1214: *
 1215:                IL = 1
 1216: *
 1217: *              WORK(IL) is M by M
 1218: *
 1219:                LDWRKL = M
 1220:                ITAU = IL + LDWRKL*M
 1221:                NWORK = ITAU + M
 1222: *
 1223: *              Compute A=L*Q
 1224: *              Workspace: need   M*M [L] + M [tau] + M    [work]
 1225: *              Workspace: prefer M*M [L] + M [tau] + M*NB [work]
 1226: *
 1227:                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
 1228:      $                      LWORK - NWORK + 1, IERR )
 1229: *
 1230: *              Copy L to WORK(IL), zeroing out above it
 1231: *
 1232:                CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
 1233:                CALL DLASET( 'U', M - 1, M - 1, ZERO, ZERO,
 1234:      $                      WORK( IL + LDWRKL ), LDWRKL )
 1235: *
 1236: *              Generate Q in A
 1237: *              Workspace: need   M*M [L] + M [tau] + M    [work]
 1238: *              Workspace: prefer M*M [L] + M [tau] + M*NB [work]
 1239: *
 1240:                CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
 1241:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 1242:                IE = ITAU
 1243:                ITAUQ = IE + M
 1244:                ITAUP = ITAUQ + M
 1245:                NWORK = ITAUP + M
 1246: *
 1247: *              Bidiagonalize L in WORK(IU).
 1248: *              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M      [work]
 1249: *              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
 1250: *
 1251:                CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
 1252:      $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
 1253:      $                      LWORK - NWORK + 1, IERR )
 1254: *
 1255: *              Perform bidiagonal SVD, computing left singular vectors
 1256: *              of bidiagonal matrix in U and computing right singular
 1257: *              vectors of bidiagonal matrix in VT
 1258: *              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + BDSPAC
 1259: *
 1260:                CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT,
 1261:      $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
 1262:      $                      INFO )
 1263: *
 1264: *              Overwrite U by left singular vectors of L and VT
 1265: *              by right singular vectors of L
 1266: *              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M    [work]
 1267: *              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
 1268: *
 1269:                CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
 1270:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
 1271:      $                      LWORK - NWORK + 1, IERR )
 1272:                CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
 1273:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
 1274:      $                      LWORK - NWORK + 1, IERR )
 1275: *
 1276: *              Multiply right singular vectors of L in WORK(IL) by
 1277: *              Q in A, storing result in VT
 1278: *              Workspace: need   M*M [L]
 1279: *
 1280:                CALL DLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL )
 1281:                CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL,
 1282:      $                     A, LDA, ZERO, VT, LDVT )
 1283: *
 1284:             ELSE IF( WNTQA ) THEN
 1285: *
 1286: *              Path 4t (N >> M, JOBZ='A')
 1287: *              N right singular vectors to be computed in VT and
 1288: *              M left singular vectors to be computed in U
 1289: *
 1290:                IVT = 1
 1291: *
 1292: *              WORK(IVT) is M by M
 1293: *
 1294:                LDWKVT = M
 1295:                ITAU = IVT + LDWKVT*M
 1296:                NWORK = ITAU + M
 1297: *
 1298: *              Compute A=L*Q, copying result to VT
 1299: *              Workspace: need   M*M [VT] + M [tau] + M    [work]
 1300: *              Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
 1301: *
 1302:                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
 1303:      $                      LWORK - NWORK + 1, IERR )
 1304:                CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 1305: *
 1306: *              Generate Q in VT
 1307: *              Workspace: need   M*M [VT] + M [tau] + N    [work]
 1308: *              Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
 1309: *
 1310:                CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
 1311:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 1312: *
 1313: *              Produce L in A, zeroing out other entries
 1314: *
 1315:                CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
 1316:                IE = ITAU
 1317:                ITAUQ = IE + M
 1318:                ITAUP = ITAUQ + M
 1319:                NWORK = ITAUP + M
 1320: *
 1321: *              Bidiagonalize L in A
 1322: *              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + M      [work]
 1323: *              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
 1324: *
 1325:                CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
 1326:      $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
 1327:      $                      IERR )
 1328: *
 1329: *              Perform bidiagonal SVD, computing left singular vectors
 1330: *              of bidiagonal matrix in U and computing right singular
 1331: *              vectors of bidiagonal matrix in WORK(IVT)
 1332: *              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
 1333: *
 1334:                CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
 1335:      $                      WORK( IVT ), LDWKVT, DUM, IDUM,
 1336:      $                      WORK( NWORK ), IWORK, INFO )
 1337: *
 1338: *              Overwrite U by left singular vectors of L and WORK(IVT)
 1339: *              by right singular vectors of L
 1340: *              Workspace: need   M*M [VT] + 3*M [e, tauq, taup]+ M    [work]
 1341: *              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
 1342: *
 1343:                CALL DORMBR( 'Q', 'L', 'N', M, M, M, A, LDA,
 1344:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
 1345:      $                      LWORK - NWORK + 1, IERR )
 1346:                CALL DORMBR( 'P', 'R', 'T', M, M, M, A, LDA,
 1347:      $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
 1348:      $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 1349: *
 1350: *              Multiply right singular vectors of L in WORK(IVT) by
 1351: *              Q in VT, storing result in A
 1352: *              Workspace: need   M*M [VT]
 1353: *
 1354:                CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT,
 1355:      $                     VT, LDVT, ZERO, A, LDA )
 1356: *
 1357: *              Copy right singular vectors of A from A to VT
 1358: *
 1359:                CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
 1360: *
 1361:             END IF
 1362: *
 1363:          ELSE
 1364: *
 1365: *           N .LT. MNTHR
 1366: *
 1367: *           Path 5t (N > M, but not much larger)
 1368: *           Reduce to bidiagonal form without LQ decomposition
 1369: *
 1370:             IE = 1
 1371:             ITAUQ = IE + M
 1372:             ITAUP = ITAUQ + M
 1373:             NWORK = ITAUP + M
 1374: *
 1375: *           Bidiagonalize A
 1376: *           Workspace: need   3*M [e, tauq, taup] + N        [work]
 1377: *           Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
 1378: *
 1379:             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
 1380:      $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
 1381:      $                   IERR )
 1382:             IF( WNTQN ) THEN
 1383: *
 1384: *              Path 5tn (N > M, JOBZ='N')
 1385: *              Perform bidiagonal SVD, only computing singular values
 1386: *              Workspace: need   3*M [e, tauq, taup] + BDSPAC
 1387: *
 1388:                CALL DBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
 1389:      $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
 1390:             ELSE IF( WNTQO ) THEN
 1391: *              Path 5to (N > M, JOBZ='O')
 1392:                LDWKVT = M
 1393:                IVT = NWORK
 1394:                IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN
 1395: *
 1396: *                 WORK( IVT ) is M by N
 1397: *
 1398:                   CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ),
 1399:      $                         LDWKVT )
 1400:                   NWORK = IVT + LDWKVT*N
 1401: *                 IL is unused; silence compile warnings
 1402:                   IL = -1
 1403:                ELSE
 1404: *
 1405: *                 WORK( IVT ) is M by M
 1406: *
 1407:                   NWORK = IVT + LDWKVT*M
 1408:                   IL = NWORK
 1409: *
 1410: *                 WORK(IL) is M by CHUNK
 1411: *
 1412:                   CHUNK = ( LWORK - M*M - 3*M ) / M
 1413:                END IF
 1414: *
 1415: *              Perform bidiagonal SVD, computing left singular vectors
 1416: *              of bidiagonal matrix in U and computing right singular
 1417: *              vectors of bidiagonal matrix in WORK(IVT)
 1418: *              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + BDSPAC
 1419: *
 1420:                CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU,
 1421:      $                      WORK( IVT ), LDWKVT, DUM, IDUM,
 1422:      $                      WORK( NWORK ), IWORK, INFO )
 1423: *
 1424: *              Overwrite U by left singular vectors of A
 1425: *              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]
 1426: *              Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
 1427: *
 1428:                CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
 1429:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
 1430:      $                      LWORK - NWORK + 1, IERR )
 1431: *
 1432:                IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN
 1433: *
 1434: *                 Path 5to-fast
 1435: *                 Overwrite WORK(IVT) by left singular vectors of A
 1436: *                 Workspace: need   3*M [e, tauq, taup] + M*N [VT] + M    [work]
 1437: *                 Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
 1438: *
 1439:                   CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
 1440:      $                         WORK( ITAUP ), WORK( IVT ), LDWKVT,
 1441:      $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )
 1442: *
 1443: *                 Copy right singular vectors of A from WORK(IVT) to A
 1444: *
 1445:                   CALL DLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA )
 1446:                ELSE
 1447: *
 1448: *                 Path 5to-slow
 1449: *                 Generate P**T in A
 1450: *                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]
 1451: *                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
 1452: *
 1453:                   CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
 1454:      $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )
 1455: *
 1456: *                 Multiply Q in A by right singular vectors of
 1457: *                 bidiagonal matrix in WORK(IVT), storing result in
 1458: *                 WORK(IL) and copying to A
 1459: *                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
 1460: *                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N  [L]
 1461: *
 1462:                   DO 40 I = 1, N, CHUNK
 1463:                      BLK = MIN( N - I + 1, CHUNK )
 1464:                      CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ),
 1465:      $                           LDWKVT, A( 1, I ), LDA, ZERO,
 1466:      $                           WORK( IL ), M )
 1467:                      CALL DLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ),
 1468:      $                            LDA )
 1469:    40             CONTINUE
 1470:                END IF
 1471:             ELSE IF( WNTQS ) THEN
 1472: *
 1473: *              Path 5ts (N > M, JOBZ='S')
 1474: *              Perform bidiagonal SVD, computing left singular vectors
 1475: *              of bidiagonal matrix in U and computing right singular
 1476: *              vectors of bidiagonal matrix in VT
 1477: *              Workspace: need   3*M [e, tauq, taup] + BDSPAC
 1478: *
 1479:                CALL DLASET( 'F', M, N, ZERO, ZERO, VT, LDVT )
 1480:                CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
 1481:      $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
 1482:      $                      INFO )
 1483: *
 1484: *              Overwrite U by left singular vectors of A and VT
 1485: *              by right singular vectors of A
 1486: *              Workspace: need   3*M [e, tauq, taup] + M    [work]
 1487: *              Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
 1488: *
 1489:                CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
 1490:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
 1491:      $                      LWORK - NWORK + 1, IERR )
 1492:                CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
 1493:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
 1494:      $                      LWORK - NWORK + 1, IERR )
 1495:             ELSE IF( WNTQA ) THEN
 1496: *
 1497: *              Path 5ta (N > M, JOBZ='A')
 1498: *              Perform bidiagonal SVD, computing left singular vectors
 1499: *              of bidiagonal matrix in U and computing right singular
 1500: *              vectors of bidiagonal matrix in VT
 1501: *              Workspace: need   3*M [e, tauq, taup] + BDSPAC
 1502: *
 1503:                CALL DLASET( 'F', N, N, ZERO, ZERO, VT, LDVT )
 1504:                CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
 1505:      $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
 1506:      $                      INFO )
 1507: *
 1508: *              Set the right corner of VT to identity matrix
 1509: *
 1510:                IF( N.GT.M ) THEN
 1511:                   CALL DLASET( 'F', N-M, N-M, ZERO, ONE, VT(M+1,M+1),
 1512:      $                         LDVT )
 1513:                END IF
 1514: *
 1515: *              Overwrite U by left singular vectors of A and VT
 1516: *              by right singular vectors of A
 1517: *              Workspace: need   3*M [e, tauq, taup] + N    [work]
 1518: *              Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
 1519: *
 1520:                CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
 1521:      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
 1522:      $                      LWORK - NWORK + 1, IERR )
 1523:                CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
 1524:      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
 1525:      $                      LWORK - NWORK + 1, IERR )
 1526:             END IF
 1527: *
 1528:          END IF
 1529: *
 1530:       END IF
 1531: *
 1532: *     Undo scaling if necessary
 1533: *
 1534:       IF( ISCL.EQ.1 ) THEN
 1535:          IF( ANRM.GT.BIGNUM )
 1536:      $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
 1537:      $                   IERR )
 1538:          IF( ANRM.LT.SMLNUM )
 1539:      $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
 1540:      $                   IERR )
 1541:       END IF
 1542: *
 1543: *     Return optimal workspace in WORK(1)
 1544: *
 1545:       WORK( 1 ) = DROUNDUP_LWORK( MAXWRK )
 1546: *
 1547:       RETURN
 1548: *
 1549: *     End of DGESDD
 1550: *
 1551:       END

CVSweb interface <joel.bertrand@systella.fr>