--- rpl/lapack/lapack/dgesdd.f	2010/08/06 15:28:37	1.3
+++ rpl/lapack/lapack/dgesdd.f	2023/08/07 08:38:49	1.22
@@ -1,10 +1,226 @@
-      SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK,
-     $                   LWORK, IWORK, INFO )
+*> \brief \b DGESDD
 *
-*  -- LAPACK driver routine (version 3.2.1)                                  --
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DGESDD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesdd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesdd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesdd.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
+*                          WORK, LWORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          JOBZ
+*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
+*      $                   VT( LDVT, * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGESDD computes the singular value decomposition (SVD) of a real
+*> M-by-N matrix A, optionally computing the left and right singular
+*> vectors.  If singular vectors are desired, it uses a
+*> divide-and-conquer algorithm.
+*>
+*> The SVD is written
+*>
+*>      A = U * SIGMA * transpose(V)
+*>
+*> where SIGMA is an M-by-N matrix which is zero except for its
+*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+*> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+*> are the singular values of A; they are real and non-negative, and
+*> are returned in descending order.  The first min(m,n) columns of
+*> U and V are the left and right singular vectors of A.
+*>
+*> Note that the routine returns VT = V**T, not V.
+*>
+*> The divide and conquer algorithm makes very mild assumptions about
+*> floating point arithmetic. It will work on machines with a guard
+*> digit in add/subtract, or on those binary machines without guard
+*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*> without guard digits, but we know of none.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] JOBZ
+*> \verbatim
+*>          JOBZ is CHARACTER*1
+*>          Specifies options for computing all or part of the matrix U:
+*>          = 'A':  all M columns of U and all N rows of V**T are
+*>                  returned in the arrays U and VT;
+*>          = 'S':  the first min(M,N) columns of U and the first
+*>                  min(M,N) rows of V**T are returned in the arrays U
+*>                  and VT;
+*>          = 'O':  If M >= N, the first N columns of U are overwritten
+*>                  on the array A and all rows of V**T are returned in
+*>                  the array VT;
+*>                  otherwise, all columns of U are returned in the
+*>                  array U and the first M rows of V**T are overwritten
+*>                  in the array A;
+*>          = 'N':  no columns of U or rows of V**T are computed.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the input matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the input matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*>          On exit,
+*>          if JOBZ = 'O',  A is overwritten with the first N columns
+*>                          of U (the left singular vectors, stored
+*>                          columnwise) if M >= N;
+*>                          A is overwritten with the first M rows
+*>                          of V**T (the right singular vectors, stored
+*>                          rowwise) otherwise.
+*>          if JOBZ .ne. 'O', the contents of A are destroyed.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension (min(M,N))
+*>          The singular values of A, sorted so that S(i) >= S(i+1).
+*> \endverbatim
+*>
+*> \param[out] U
+*> \verbatim
+*>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
+*>          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+*>          UCOL = min(M,N) if JOBZ = 'S'.
+*>          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+*>          orthogonal matrix U;
+*>          if JOBZ = 'S', U contains the first min(M,N) columns of U
+*>          (the left singular vectors, stored columnwise);
+*>          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER
+*>          The leading dimension of the array U.  LDU >= 1; if
+*>          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+*> \endverbatim
+*>
+*> \param[out] VT
+*> \verbatim
+*>          VT is DOUBLE PRECISION array, dimension (LDVT,N)
+*>          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+*>          N-by-N orthogonal matrix V**T;
+*>          if JOBZ = 'S', VT contains the first min(M,N) rows of
+*>          V**T (the right singular vectors, stored rowwise);
+*>          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDVT
+*> \verbatim
+*>          LDVT is INTEGER
+*>          The leading dimension of the array VT.  LDVT >= 1;
+*>          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+*>          if JOBZ = 'S', LDVT >= min(M,N).
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK. LWORK >= 1.
+*>          If LWORK = -1, a workspace query is assumed.  The optimal
+*>          size for the WORK array is calculated and stored in WORK(1),
+*>          and no other work except argument checking is performed.
+*>
+*>          Let mx = max(M,N) and mn = min(M,N).
+*>          If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
+*>          If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
+*>          If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
+*>          If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
+*>          These are not tight minimums in all cases; see comments inside code.
+*>          For good performance, LWORK should generally be larger;
+*>          a query is recommended.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (8*min(M,N))
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          <  0:  if INFO = -i, the i-th argument had an illegal value.
+*>          = -4:  if A had a NAN entry.
+*>          >  0:  DBDSDC did not converge, updating process failed.
+*>          =  0:  successful exit.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup doubleGEsing
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
+     $                   WORK, LWORK, IWORK, INFO )
+      implicit none
+*
+*  -- LAPACK driver routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     March 2009
 *
 *     .. Scalar Arguments ..
       CHARACTER          JOBZ
@@ -16,131 +232,6 @@
      $                   VT( LDVT, * ), WORK( * )
 *     ..
 *
-*  Purpose
-*  =======
-*
-*  DGESDD computes the singular value decomposition (SVD) of a real
-*  M-by-N matrix A, optionally computing the left and right singular
-*  vectors.  If singular vectors are desired, it uses a
-*  divide-and-conquer algorithm.
-*
-*  The SVD is written
-*
-*       A = U * SIGMA * transpose(V)
-*
-*  where SIGMA is an M-by-N matrix which is zero except for its
-*  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
-*  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
-*  are the singular values of A; they are real and non-negative, and
-*  are returned in descending order.  The first min(m,n) columns of
-*  U and V are the left and right singular vectors of A.
-*
-*  Note that the routine returns VT = V**T, not V.
-*
-*  The divide and conquer algorithm makes very mild assumptions about
-*  floating point arithmetic. It will work on machines with a guard
-*  digit in add/subtract, or on those binary machines without guard
-*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
-*  without guard digits, but we know of none.
-*
-*  Arguments
-*  =========
-*
-*  JOBZ    (input) CHARACTER*1
-*          Specifies options for computing all or part of the matrix U:
-*          = 'A':  all M columns of U and all N rows of V**T are
-*                  returned in the arrays U and VT;
-*          = 'S':  the first min(M,N) columns of U and the first
-*                  min(M,N) rows of V**T are returned in the arrays U
-*                  and VT;
-*          = 'O':  If M >= N, the first N columns of U are overwritten
-*                  on the array A and all rows of V**T are returned in
-*                  the array VT;
-*                  otherwise, all columns of U are returned in the
-*                  array U and the first M rows of V**T are overwritten
-*                  in the array A;
-*          = 'N':  no columns of U or rows of V**T are computed.
-*
-*  M       (input) INTEGER
-*          The number of rows of the input matrix A.  M >= 0.
-*
-*  N       (input) INTEGER
-*          The number of columns of the input matrix A.  N >= 0.
-*
-*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-*          On entry, the M-by-N matrix A.
-*          On exit,
-*          if JOBZ = 'O',  A is overwritten with the first N columns
-*                          of U (the left singular vectors, stored
-*                          columnwise) if M >= N;
-*                          A is overwritten with the first M rows
-*                          of V**T (the right singular vectors, stored
-*                          rowwise) otherwise.
-*          if JOBZ .ne. 'O', the contents of A are destroyed.
-*
-*  LDA     (input) INTEGER
-*          The leading dimension of the array A.  LDA >= max(1,M).
-*
-*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
-*          The singular values of A, sorted so that S(i) >= S(i+1).
-*
-*  U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
-*          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
-*          UCOL = min(M,N) if JOBZ = 'S'.
-*          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
-*          orthogonal matrix U;
-*          if JOBZ = 'S', U contains the first min(M,N) columns of U
-*          (the left singular vectors, stored columnwise);
-*          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
-*
-*  LDU     (input) INTEGER
-*          The leading dimension of the array U.  LDU >= 1; if
-*          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
-*
-*  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
-*          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
-*          N-by-N orthogonal matrix V**T;
-*          if JOBZ = 'S', VT contains the first min(M,N) rows of
-*          V**T (the right singular vectors, stored rowwise);
-*          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
-*
-*  LDVT    (input) INTEGER
-*          The leading dimension of the array VT.  LDVT >= 1; if
-*          JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
-*          if JOBZ = 'S', LDVT >= min(M,N).
-*
-*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
-*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
-*
-*  LWORK   (input) INTEGER
-*          The dimension of the array WORK. LWORK >= 1.
-*          If JOBZ = 'N',
-*            LWORK >= 3*min(M,N) + max(max(M,N),7*min(M,N)).
-*          If JOBZ = 'O',
-*            LWORK >= 3*min(M,N) + 
-*                     max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
-*          If JOBZ = 'S' or 'A'
-*            LWORK >= 3*min(M,N) +
-*                     max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
-*          For good performance, LWORK should generally be larger.
-*          If LWORK = -1 but other input arguments are legal, WORK(1)
-*          returns the optimal LWORK.
-*
-*  IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
-*
-*  INFO    (output) INTEGER
-*          = 0:  successful exit.
-*          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*          > 0:  DBDSDC did not converge, updating process failed.
-*
-*  Further Details
-*  ===============
-*
-*  Based on contributions by
-*     Ming Gu and Huan Ren, Computer Science Division, University of
-*     California at Berkeley, USA
-*
 *  =====================================================================
 *
 *     .. Parameters ..
@@ -153,6 +244,15 @@
      $                   IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
      $                   LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
      $                   MNTHR, NWORK, WRKBL
+      INTEGER            LWORK_DGEBRD_MN, LWORK_DGEBRD_MM,
+     $                   LWORK_DGEBRD_NN, LWORK_DGELQF_MN,
+     $                   LWORK_DGEQRF_MN,
+     $                   LWORK_DORGBR_P_MM, LWORK_DORGBR_Q_NN,
+     $                   LWORK_DORGLQ_MN, LWORK_DORGLQ_NN,
+     $                   LWORK_DORGQR_MM, LWORK_DORGQR_MN,
+     $                   LWORK_DORMBR_PRT_MM, LWORK_DORMBR_QLN_MM,
+     $                   LWORK_DORMBR_PRT_MN, LWORK_DORMBR_QLN_MN,
+     $                   LWORK_DORMBR_PRT_NN, LWORK_DORMBR_QLN_NN
       DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
 *     ..
 *     .. Local Arrays ..
@@ -165,10 +265,10 @@
      $                   XERBLA
 *     ..
 *     .. External Functions ..
-      LOGICAL            LSAME
-      INTEGER            ILAENV
-      DOUBLE PRECISION   DLAMCH, DLANGE
-      EXTERNAL           DLAMCH, DLANGE, ILAENV, LSAME
+      LOGICAL            LSAME, DISNAN
+      DOUBLE PRECISION   DLAMCH, DLANGE, DROUNDUP_LWORK
+      EXTERNAL           DLAMCH, DLANGE, LSAME, DISNAN, 
+     $                   DROUNDUP_LWORK
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          INT, MAX, MIN, SQRT
@@ -177,13 +277,13 @@
 *
 *     Test the input arguments
 *
-      INFO = 0
-      MINMN = MIN( M, N )
-      WNTQA = LSAME( JOBZ, 'A' )
-      WNTQS = LSAME( JOBZ, 'S' )
+      INFO   = 0
+      MINMN  = MIN( M, N )
+      WNTQA  = LSAME( JOBZ, 'A' )
+      WNTQS  = LSAME( JOBZ, 'S' )
       WNTQAS = WNTQA .OR. WNTQS
-      WNTQO = LSAME( JOBZ, 'O' )
-      WNTQN = LSAME( JOBZ, 'N' )
+      WNTQO  = LSAME( JOBZ, 'O' )
+      WNTQN  = LSAME( JOBZ, 'N' )
       LQUERY = ( LWORK.EQ.-1 )
 *
       IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN
@@ -204,115 +304,140 @@
       END IF
 *
 *     Compute workspace
-*      (Note: Comments in the code beginning "Workspace:" describe the
-*       minimal amount of workspace needed at that point in the code,
+*       Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace allocated at that point in the code,
 *       as well as the preferred amount for good performance.
 *       NB refers to the optimal block size for the immediately
-*       following subroutine, as returned by ILAENV.)
+*       following subroutine, as returned by ILAENV.
 *
       IF( INFO.EQ.0 ) THEN
          MINWRK = 1
          MAXWRK = 1
+         BDSPAC = 0
+         MNTHR  = INT( MINMN*11.0D0 / 6.0D0 )
          IF( M.GE.N .AND. MINMN.GT.0 ) THEN
 *
 *           Compute space needed for DBDSDC
 *
-            MNTHR = INT( MINMN*11.0D0 / 6.0D0 )
             IF( WNTQN ) THEN
+*              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
+*              keep 7*N for backwards compatibility.
                BDSPAC = 7*N
             ELSE
                BDSPAC = 3*N*N + 4*N
             END IF
+*
+*           Compute space preferred for each routine
+            CALL DGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1),
+     $                   DUM(1), DUM(1), -1, IERR )
+            LWORK_DGEBRD_MN = INT( DUM(1) )
+*
+            CALL DGEBRD( N, N, DUM(1), N, DUM(1), DUM(1), DUM(1),
+     $                   DUM(1), DUM(1), -1, IERR )
+            LWORK_DGEBRD_NN = INT( DUM(1) )
+*
+            CALL DGEQRF( M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
+            LWORK_DGEQRF_MN = INT( DUM(1) )
+*
+            CALL DORGBR( 'Q', N, N, N, DUM(1), N, DUM(1), DUM(1), -1,
+     $                   IERR )
+            LWORK_DORGBR_Q_NN = INT( DUM(1) )
+*
+            CALL DORGQR( M, M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
+            LWORK_DORGQR_MM = INT( DUM(1) )
+*
+            CALL DORGQR( M, N, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
+            LWORK_DORGQR_MN = INT( DUM(1) )
+*
+            CALL DORMBR( 'P', 'R', 'T', N, N, N, DUM(1), N,
+     $                   DUM(1), DUM(1), N, DUM(1), -1, IERR )
+            LWORK_DORMBR_PRT_NN = INT( DUM(1) )
+*
+            CALL DORMBR( 'Q', 'L', 'N', N, N, N, DUM(1), N,
+     $                   DUM(1), DUM(1), N, DUM(1), -1, IERR )
+            LWORK_DORMBR_QLN_NN = INT( DUM(1) )
+*
+            CALL DORMBR( 'Q', 'L', 'N', M, N, N, DUM(1), M,
+     $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
+            LWORK_DORMBR_QLN_MN = INT( DUM(1) )
+*
+            CALL DORMBR( 'Q', 'L', 'N', M, M, N, DUM(1), M,
+     $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
+            LWORK_DORMBR_QLN_MM = INT( DUM(1) )
+*
             IF( M.GE.MNTHR ) THEN
                IF( WNTQN ) THEN
 *
-*                 Path 1 (M much larger than N, JOBZ='N')
+*                 Path 1 (M >> N, JOBZ='N')
 *
-                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
-     $                    -1 )
-                  WRKBL = MAX( WRKBL, 3*N+2*N*
-     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
-                  MAXWRK = MAX( WRKBL, BDSPAC+N )
+                  WRKBL = N + LWORK_DGEQRF_MN
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN )
+                  MAXWRK = MAX( WRKBL, BDSPAC + N )
                   MINWRK = BDSPAC + N
                ELSE IF( WNTQO ) THEN
 *
-*                 Path 2 (M much larger than N, JOBZ='O')
+*                 Path 2 (M >> N, JOBZ='O')
 *
-                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
-                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
-     $                    N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+2*N*
-     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  WRKBL = N + LWORK_DGEQRF_MN
+                  WRKBL = MAX( WRKBL,   N + LWORK_DORGQR_MN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
+                  WRKBL = MAX( WRKBL, 3*N + BDSPAC )
                   MAXWRK = WRKBL + 2*N*N
                   MINWRK = BDSPAC + 2*N*N + 3*N
                ELSE IF( WNTQS ) THEN
 *
-*                 Path 3 (M much larger than N, JOBZ='S')
+*                 Path 3 (M >> N, JOBZ='S')
 *
-                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
-                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
-     $                    N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+2*N*
-     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  WRKBL = N + LWORK_DGEQRF_MN
+                  WRKBL = MAX( WRKBL,   N + LWORK_DORGQR_MN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
+                  WRKBL = MAX( WRKBL, 3*N + BDSPAC )
                   MAXWRK = WRKBL + N*N
                   MINWRK = BDSPAC + N*N + 3*N
                ELSE IF( WNTQA ) THEN
 *
-*                 Path 4 (M much larger than N, JOBZ='A')
+*                 Path 4 (M >> N, JOBZ='A')
 *
-                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
-                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
-     $                    M, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+2*N*
-     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  WRKBL = N + LWORK_DGEQRF_MN
+                  WRKBL = MAX( WRKBL,   N + LWORK_DORGQR_MM )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
+                  WRKBL = MAX( WRKBL, 3*N + BDSPAC )
                   MAXWRK = WRKBL + N*N
-                  MINWRK = BDSPAC + N*N + 3*N
+                  MINWRK = N*N + MAX( 3*N + BDSPAC, N + M )
                END IF
             ELSE
 *
-*              Path 5 (M at least N, but not much larger)
+*              Path 5 (M >= N, but not much larger)
 *
-               WRKBL = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N, -1,
-     $                 -1 )
+               WRKBL = 3*N + LWORK_DGEBRD_MN
                IF( WNTQN ) THEN
-                  MAXWRK = MAX( WRKBL, BDSPAC+3*N )
+*                 Path 5n (M >= N, jobz='N')
+                  MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
                   MINWRK = 3*N + MAX( M, BDSPAC )
                ELSE IF( WNTQO ) THEN
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+*                 Path 5o (M >= N, jobz='O')
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MN )
+                  WRKBL = MAX( WRKBL, 3*N + BDSPAC )
                   MAXWRK = WRKBL + M*N
-                  MINWRK = 3*N + MAX( M, N*N+BDSPAC )
+                  MINWRK = 3*N + MAX( M, N*N + BDSPAC )
                ELSE IF( WNTQS ) THEN
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, N, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
-                  MAXWRK = MAX( WRKBL, BDSPAC+3*N )
+*                 Path 5s (M >= N, jobz='S')
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MN )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
+                  MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
                   MINWRK = 3*N + MAX( M, BDSPAC )
                ELSE IF( WNTQA ) THEN
-                  WRKBL = MAX( WRKBL, 3*N+M*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*N+N*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
-                  MAXWRK = MAX( MAXWRK, BDSPAC+3*N )
+*                 Path 5a (M >= N, jobz='A')
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MM )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN )
+                  MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
                   MINWRK = 3*N + MAX( M, BDSPAC )
                END IF
             END IF
@@ -320,108 +445,131 @@
 *
 *           Compute space needed for DBDSDC
 *
-            MNTHR = INT( MINMN*11.0D0 / 6.0D0 )
             IF( WNTQN ) THEN
+*              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
+*              keep 7*N for backwards compatibility.
                BDSPAC = 7*M
             ELSE
                BDSPAC = 3*M*M + 4*M
             END IF
+*
+*           Compute space preferred for each routine
+            CALL DGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1),
+     $                   DUM(1), DUM(1), -1, IERR )
+            LWORK_DGEBRD_MN = INT( DUM(1) )
+*
+            CALL DGEBRD( M, M, A, M, S, DUM(1), DUM(1),
+     $                   DUM(1), DUM(1), -1, IERR )
+            LWORK_DGEBRD_MM = INT( DUM(1) )
+*
+            CALL DGELQF( M, N, A, M, DUM(1), DUM(1), -1, IERR )
+            LWORK_DGELQF_MN = INT( DUM(1) )
+*
+            CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
+            LWORK_DORGLQ_NN = INT( DUM(1) )
+*
+            CALL DORGLQ( M, N, M, A, M, DUM(1), DUM(1), -1, IERR )
+            LWORK_DORGLQ_MN = INT( DUM(1) )
+*
+            CALL DORGBR( 'P', M, M, M, A, N, DUM(1), DUM(1), -1, IERR )
+            LWORK_DORGBR_P_MM = INT( DUM(1) )
+*
+            CALL DORMBR( 'P', 'R', 'T', M, M, M, DUM(1), M,
+     $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
+            LWORK_DORMBR_PRT_MM = INT( DUM(1) )
+*
+            CALL DORMBR( 'P', 'R', 'T', M, N, M, DUM(1), M,
+     $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
+            LWORK_DORMBR_PRT_MN = INT( DUM(1) )
+*
+            CALL DORMBR( 'P', 'R', 'T', N, N, M, DUM(1), N,
+     $                   DUM(1), DUM(1), N, DUM(1), -1, IERR )
+            LWORK_DORMBR_PRT_NN = INT( DUM(1) )
+*
+            CALL DORMBR( 'Q', 'L', 'N', M, M, M, DUM(1), M,
+     $                   DUM(1), DUM(1), M, DUM(1), -1, IERR )
+            LWORK_DORMBR_QLN_MM = INT( DUM(1) )
+*
             IF( N.GE.MNTHR ) THEN
                IF( WNTQN ) THEN
 *
-*                 Path 1t (N much larger than M, JOBZ='N')
+*                 Path 1t (N >> M, JOBZ='N')
 *
-                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
-     $                    -1 )
-                  WRKBL = MAX( WRKBL, 3*M+2*M*
-     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
-                  MAXWRK = MAX( WRKBL, BDSPAC+M )
+                  WRKBL = M + LWORK_DGELQF_MN
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM )
+                  MAXWRK = MAX( WRKBL, BDSPAC + M )
                   MINWRK = BDSPAC + M
                ELSE IF( WNTQO ) THEN
 *
-*                 Path 2t (N much larger than M, JOBZ='O')
+*                 Path 2t (N >> M, JOBZ='O')
 *
-                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
-                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
-     $                    N, M, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+2*M*
-     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) )
-                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  WRKBL = M + LWORK_DGELQF_MN
+                  WRKBL = MAX( WRKBL,   M + LWORK_DORGLQ_MN )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM )
+                  WRKBL = MAX( WRKBL, 3*M + BDSPAC )
                   MAXWRK = WRKBL + 2*M*M
                   MINWRK = BDSPAC + 2*M*M + 3*M
                ELSE IF( WNTQS ) THEN
 *
-*                 Path 3t (N much larger than M, JOBZ='S')
+*                 Path 3t (N >> M, JOBZ='S')
 *
-                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
-                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
-     $                    N, M, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+2*M*
-     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) )
-                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  WRKBL = M + LWORK_DGELQF_MN
+                  WRKBL = MAX( WRKBL,   M + LWORK_DORGLQ_MN )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM )
+                  WRKBL = MAX( WRKBL, 3*M + BDSPAC )
                   MAXWRK = WRKBL + M*M
                   MINWRK = BDSPAC + M*M + 3*M
                ELSE IF( WNTQA ) THEN
 *
-*                 Path 4t (N much larger than M, JOBZ='A')
+*                 Path 4t (N >> M, JOBZ='A')
 *
-                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
-                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
-     $                    N, M, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+2*M*
-     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) )
-                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  WRKBL = M + LWORK_DGELQF_MN
+                  WRKBL = MAX( WRKBL,   M + LWORK_DORGLQ_NN )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM )
+                  WRKBL = MAX( WRKBL, 3*M + BDSPAC )
                   MAXWRK = WRKBL + M*M
-                  MINWRK = BDSPAC + M*M + 3*M
+                  MINWRK = M*M + MAX( 3*M + BDSPAC, M + N )
                END IF
             ELSE
 *
-*              Path 5t (N greater than M, but not much larger)
+*              Path 5t (N > M, but not much larger)
 *
-               WRKBL = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N, -1,
-     $                 -1 )
+               WRKBL = 3*M + LWORK_DGEBRD_MN
                IF( WNTQN ) THEN
-                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+*                 Path 5tn (N > M, jobz='N')
+                  MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
                   MINWRK = 3*M + MAX( N, BDSPAC )
                ELSE IF( WNTQO ) THEN
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', M, N, M, -1 ) )
-                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+*                 Path 5to (N > M, jobz='O')
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MN )
+                  WRKBL = MAX( WRKBL, 3*M + BDSPAC )
                   MAXWRK = WRKBL + M*N
-                  MINWRK = 3*M + MAX( N, M*M+BDSPAC )
+                  MINWRK = 3*M + MAX( N, M*M + BDSPAC )
                ELSE IF( WNTQS ) THEN
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', M, N, M, -1 ) )
-                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+*                 Path 5ts (N > M, jobz='S')
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MN )
+                  MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
                   MINWRK = 3*M + MAX( N, BDSPAC )
                ELSE IF( WNTQA ) THEN
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) )
-                  WRKBL = MAX( WRKBL, 3*M+M*
-     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, M, -1 ) )
-                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+*                 Path 5ta (N > M, jobz='A')
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_NN )
+                  MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
                   MINWRK = 3*M + MAX( N, BDSPAC )
                END IF
             END IF
          END IF
+
          MAXWRK = MAX( MAXWRK, MINWRK )
-         WORK( 1 ) = MAXWRK
+         WORK( 1 ) = DROUNDUP_LWORK( MAXWRK )
 *
          IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
             INFO = -12
@@ -450,6 +598,10 @@
 *     Scale A if max element outside range [SMLNUM,BIGNUM]
 *
       ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
+      IF( DISNAN( ANRM ) ) THEN
+          INFO = -4
+          RETURN
+      END IF
       ISCL = 0
       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
          ISCL = 1
@@ -469,17 +621,18 @@
 *
             IF( WNTQN ) THEN
 *
-*              Path 1 (M much larger than N, JOBZ='N')
+*              Path 1 (M >> N, JOBZ='N')
 *              No singular vectors to be computed
 *
                ITAU = 1
                NWORK = ITAU + N
 *
 *              Compute A=Q*R
-*              (Workspace: need 2*N, prefer N+N*NB)
+*              Workspace: need   N [tau] + N    [work]
+*              Workspace: prefer N [tau] + N*NB [work]
 *
                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Zero out below R
 *
@@ -490,7 +643,8 @@
                NWORK = ITAUP + N
 *
 *              Bidiagonalize R in A
-*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*              Workspace: need   3*N [e, tauq, taup] + N      [work]
+*              Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
 *
                CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
@@ -498,14 +652,14 @@
                NWORK = IE + N
 *
 *              Perform bidiagonal SVD, computing singular values only
-*              (Workspace: need N+BDSPAC)
+*              Workspace: need   N [e] + BDSPAC
 *
                CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
      $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
 *
             ELSE IF( WNTQO ) THEN
 *
-*              Path 2 (M much larger than N, JOBZ = 'O')
+*              Path 2 (M >> N, JOBZ = 'O')
 *              N left singular vectors to be overwritten on A and
 *              N right singular vectors to be computed in VT
 *
@@ -513,42 +667,45 @@
 *
 *              WORK(IR) is LDWRKR by N
 *
-               IF( LWORK.GE.LDA*N+N*N+3*N+BDSPAC ) THEN
+               IF( LWORK .GE. LDA*N + N*N + 3*N + BDSPAC ) THEN
                   LDWRKR = LDA
                ELSE
-                  LDWRKR = ( LWORK-N*N-3*N-BDSPAC ) / N
+                  LDWRKR = ( LWORK - N*N - 3*N - BDSPAC ) / N
                END IF
                ITAU = IR + LDWRKR*N
                NWORK = ITAU + N
 *
 *              Compute A=Q*R
-*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              Workspace: need   N*N [R] + N [tau] + N    [work]
+*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
 *
                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Copy R to WORK(IR), zeroing out below it
 *
                CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
-               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
+               CALL DLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1),
      $                      LDWRKR )
 *
 *              Generate Q in A
-*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              Workspace: need   N*N [R] + N [tau] + N    [work]
+*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
 *
                CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
                IE = ITAU
                ITAUQ = IE + N
                ITAUP = ITAUQ + N
                NWORK = ITAUP + N
 *
-*              Bidiagonalize R in VT, copying result to WORK(IR)
-*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*              Bidiagonalize R in WORK(IR)
+*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]
+*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
 *
                CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
      $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              WORK(IU) is N by N
 *
@@ -558,7 +715,7 @@
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in WORK(IU) and computing right
 *              singular vectors of bidiagonal matrix in VT
-*              (Workspace: need N+N*N+BDSPAC)
+*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
 *
                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
      $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
@@ -566,21 +723,23 @@
 *
 *              Overwrite WORK(IU) by left singular vectors of R
 *              and VT by right singular vectors of R
-*              (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N    [work]
+*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
      $                      WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Multiply Q in A by left singular vectors of R in
 *              WORK(IU), storing result in WORK(IR) and copying to A
-*              (Workspace: need 2*N*N, prefer N*N+M*N)
+*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U]
+*              Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
 *
                DO 10 I = 1, M, LDWRKR
-                  CHUNK = MIN( M-I+1, LDWRKR )
+                  CHUNK = MIN( M - I + 1, LDWRKR )
                   CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
      $                        LDA, WORK( IU ), N, ZERO, WORK( IR ),
      $                        LDWRKR )
@@ -590,7 +749,7 @@
 *
             ELSE IF( WNTQS ) THEN
 *
-*              Path 3 (M much larger than N, JOBZ='S')
+*              Path 3 (M >> N, JOBZ='S')
 *              N left singular vectors to be computed in U and
 *              N right singular vectors to be computed in VT
 *
@@ -603,38 +762,41 @@
                NWORK = ITAU + N
 *
 *              Compute A=Q*R
-*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              Workspace: need   N*N [R] + N [tau] + N    [work]
+*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
 *
                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Copy R to WORK(IR), zeroing out below it
 *
                CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
-               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
+               CALL DLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1),
      $                      LDWRKR )
 *
 *              Generate Q in A
-*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              Workspace: need   N*N [R] + N [tau] + N    [work]
+*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
 *
                CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
                IE = ITAU
                ITAUQ = IE + N
                ITAUP = ITAUQ + N
                NWORK = ITAUP + N
 *
 *              Bidiagonalize R in WORK(IR)
-*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]
+*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
 *
                CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
      $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagoal matrix in U and computing right singular
 *              vectors of bidiagonal matrix in VT
-*              (Workspace: need N+BDSPAC)
+*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + BDSPAC
 *
                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
      $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
@@ -642,19 +804,20 @@
 *
 *              Overwrite U by left singular vectors of R and VT
 *              by right singular vectors of R
-*              (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N    [work]
+*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
                CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Multiply Q in A by left singular vectors of R in
 *              WORK(IR), storing result in U
-*              (Workspace: need N*N)
+*              Workspace: need   N*N [R]
 *
                CALL DLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR )
                CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ),
@@ -662,7 +825,7 @@
 *
             ELSE IF( WNTQA ) THEN
 *
-*              Path 4 (M much larger than N, JOBZ='A')
+*              Path 4 (M >> N, JOBZ='A')
 *              M left singular vectors to be computed in U and
 *              N right singular vectors to be computed in VT
 *
@@ -675,16 +838,18 @@
                NWORK = ITAU + N
 *
 *              Compute A=Q*R, copying result to U
-*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              Workspace: need   N*N [U] + N [tau] + N    [work]
+*              Workspace: prefer N*N [U] + N [tau] + N*NB [work]
 *
                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *              Generate Q in U
-*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              Workspace: need   N*N [U] + N [tau] + M    [work]
+*              Workspace: prefer N*N [U] + N [tau] + M*NB [work]
                CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 *
 *              Produce R in A, zeroing out other entries
 *
@@ -695,7 +860,8 @@
                NWORK = ITAUP + N
 *
 *              Bidiagonalize R in A
-*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N      [work]
+*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
 *
                CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
@@ -704,7 +870,7 @@
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in WORK(IU) and computing right
 *              singular vectors of bidiagonal matrix in VT
-*              (Workspace: need N+N*N+BDSPAC)
+*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + BDSPAC
 *
                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
      $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
@@ -712,18 +878,19 @@
 *
 *              Overwrite WORK(IU) by left singular vectors of R and VT
 *              by right singular vectors of R
-*              (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N    [work]
+*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', N, N, N, A, LDA,
      $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Multiply Q in U by left singular vectors of R in
 *              WORK(IU), storing result in A
-*              (Workspace: need N*N)
+*              Workspace: need   N*N [U]
 *
                CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ),
      $                     LDWRKU, ZERO, A, LDA )
@@ -738,7 +905,7 @@
 *
 *           M .LT. MNTHR
 *
-*           Path 5 (M at least N, but not much larger)
+*           Path 5 (M >= N, but not much larger)
 *           Reduce to bidiagonal form without QR decomposition
 *
             IE = 1
@@ -747,21 +914,24 @@
             NWORK = ITAUP + N
 *
 *           Bidiagonalize A
-*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*           Workspace: need   3*N [e, tauq, taup] + M        [work]
+*           Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
 *
             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
      $                   IERR )
             IF( WNTQN ) THEN
 *
+*              Path 5n (M >= N, JOBZ='N')
 *              Perform bidiagonal SVD, only computing singular values
-*              (Workspace: need N+BDSPAC)
+*              Workspace: need   3*N [e, tauq, taup] + BDSPAC
 *
                CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
      $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
             ELSE IF( WNTQO ) THEN
+*              Path 5o (M >= N, JOBZ='O')
                IU = NWORK
-               IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN
+               IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN
 *
 *                 WORK( IU ) is M by N
 *
@@ -769,6 +939,8 @@
                   NWORK = IU + LDWRKU*N
                   CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IU ),
      $                         LDWRKU )
+*                 IR is unused; silence compile warnings
+                  IR = -1
                ELSE
 *
 *                 WORK( IU ) is N by N
@@ -779,53 +951,59 @@
 *                 WORK(IR) is LDWRKR by N
 *
                   IR = NWORK
-                  LDWRKR = ( LWORK-N*N-3*N ) / N
+                  LDWRKR = ( LWORK - N*N - 3*N ) / N
                END IF
                NWORK = IU + LDWRKU*N
 *
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in WORK(IU) and computing right
 *              singular vectors of bidiagonal matrix in VT
-*              (Workspace: need N+N*N+BDSPAC)
+*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + BDSPAC
 *
                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ),
      $                      LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ),
      $                      IWORK, INFO )
 *
 *              Overwrite VT by right singular vectors of A
-*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]
+*              Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
 *
                CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
-               IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN
+               IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN
 *
+*                 Path 5o-fast
 *                 Overwrite WORK(IU) by left singular vectors of A
-*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 Workspace: need   3*N [e, tauq, taup] + M*N [U] + N    [work]
+*                 Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
 *
                   CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
      $                         WORK( ITAUQ ), WORK( IU ), LDWRKU,
-     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )
 *
 *                 Copy left singular vectors of A from WORK(IU) to A
 *
                   CALL DLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA )
                ELSE
 *
+*                 Path 5o-slow
 *                 Generate Q in A
-*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]
+*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
 *
                   CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
-     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )
 *
 *                 Multiply Q in A by left singular vectors of
 *                 bidiagonal matrix in WORK(IU), storing result in
 *                 WORK(IR) and copying to A
-*                 (Workspace: need 2*N*N, prefer N*N+M*N)
+*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + NB*N [R]
+*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N  [R]
 *
                   DO 20 I = 1, M, LDWRKR
-                     CHUNK = MIN( M-I+1, LDWRKR )
+                     CHUNK = MIN( M - I + 1, LDWRKR )
                      CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
      $                           LDA, WORK( IU ), LDWRKU, ZERO,
      $                           WORK( IR ), LDWRKR )
@@ -836,10 +1014,11 @@
 *
             ELSE IF( WNTQS ) THEN
 *
+*              Path 5s (M >= N, JOBZ='S')
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in U and computing right singular
 *              vectors of bidiagonal matrix in VT
-*              (Workspace: need N+BDSPAC)
+*              Workspace: need   3*N [e, tauq, taup] + BDSPAC
 *
                CALL DLASET( 'F', M, N, ZERO, ZERO, U, LDU )
                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
@@ -848,20 +1027,22 @@
 *
 *              Overwrite U by left singular vectors of A and VT
 *              by right singular vectors of A
-*              (Workspace: need 3*N, prefer 2*N+N*NB)
+*              Workspace: need   3*N [e, tauq, taup] + N    [work]
+*              Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
             ELSE IF( WNTQA ) THEN
 *
+*              Path 5a (M >= N, JOBZ='A')
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in U and computing right singular
 *              vectors of bidiagonal matrix in VT
-*              (Workspace: need N+BDSPAC)
+*              Workspace: need   3*N [e, tauq, taup] + BDSPAC
 *
                CALL DLASET( 'F', M, M, ZERO, ZERO, U, LDU )
                CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
@@ -871,20 +1052,21 @@
 *              Set the right corner of U to identity matrix
 *
                IF( M.GT.N ) THEN
-                  CALL DLASET( 'F', M-N, M-N, ZERO, ONE, U( N+1, N+1 ),
+                  CALL DLASET( 'F', M - N, M - N, ZERO, ONE, U(N+1,N+1),
      $                         LDU )
                END IF
 *
 *              Overwrite U by left singular vectors of A and VT
 *              by right singular vectors of A
-*              (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
+*              Workspace: need   3*N [e, tauq, taup] + M    [work]
+*              Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
             END IF
 *
          END IF
@@ -899,17 +1081,18 @@
 *
             IF( WNTQN ) THEN
 *
-*              Path 1t (N much larger than M, JOBZ='N')
+*              Path 1t (N >> M, JOBZ='N')
 *              No singular vectors to be computed
 *
                ITAU = 1
                NWORK = ITAU + M
 *
 *              Compute A=L*Q
-*              (Workspace: need 2*M, prefer M+M*NB)
+*              Workspace: need   M [tau] + M [work]
+*              Workspace: prefer M [tau] + M*NB [work]
 *
                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Zero out above L
 *
@@ -920,7 +1103,8 @@
                NWORK = ITAUP + M
 *
 *              Bidiagonalize L in A
-*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*              Workspace: need   3*M [e, tauq, taup] + M      [work]
+*              Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
 *
                CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
@@ -928,68 +1112,69 @@
                NWORK = IE + M
 *
 *              Perform bidiagonal SVD, computing singular values only
-*              (Workspace: need M+BDSPAC)
+*              Workspace: need   M [e] + BDSPAC
 *
                CALL DBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
      $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
 *
             ELSE IF( WNTQO ) THEN
 *
-*              Path 2t (N much larger than M, JOBZ='O')
+*              Path 2t (N >> M, JOBZ='O')
 *              M right singular vectors to be overwritten on A and
 *              M left singular vectors to be computed in U
 *
                IVT = 1
 *
-*              IVT is M by M
+*              WORK(IVT) is M by M
+*              WORK(IL)  is M by M; it is later resized to M by chunk for gemm
 *
                IL = IVT + M*M
-               IF( LWORK.GE.M*N+M*M+3*M+BDSPAC ) THEN
-*
-*                 WORK(IL) is M by N
-*
+               IF( LWORK .GE. M*N + M*M + 3*M + BDSPAC ) THEN
                   LDWRKL = M
                   CHUNK = N
                ELSE
                   LDWRKL = M
-                  CHUNK = ( LWORK-M*M ) / M
+                  CHUNK = ( LWORK - M*M ) / M
                END IF
                ITAU = IL + LDWRKL*M
                NWORK = ITAU + M
 *
 *              Compute A=L*Q
-*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]
+*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
 *
                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Copy L to WORK(IL), zeroing about above it
 *
                CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
-               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
-     $                      WORK( IL+LDWRKL ), LDWRKL )
+               CALL DLASET( 'U', M - 1, M - 1, ZERO, ZERO,
+     $                      WORK( IL + LDWRKL ), LDWRKL )
 *
 *              Generate Q in A
-*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]
+*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
 *
                CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
                IE = ITAU
                ITAUQ = IE + M
                ITAUP = ITAUQ + M
                NWORK = ITAUP + M
 *
 *              Bidiagonalize L in WORK(IL)
-*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M      [work]
+*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
 *
                CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
      $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in U, and computing right singular
 *              vectors of bidiagonal matrix in WORK(IVT)
-*              (Workspace: need M+M*M+BDSPAC)
+*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
 *
                CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
      $                      WORK( IVT ), M, DUM, IDUM, WORK( NWORK ),
@@ -997,21 +1182,24 @@
 *
 *              Overwrite U by left singular vectors of L and WORK(IVT)
 *              by right singular vectors of L
-*              (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M    [work]
+*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
      $                      WORK( ITAUP ), WORK( IVT ), M,
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 *
 *              Multiply right singular vectors of L in WORK(IVT) by Q
 *              in A, storing result in WORK(IL) and copying to A
-*              (Workspace: need 2*M*M, prefer M*M+M*N)
+*              Workspace: need   M*M [VT] + M*M [L]
+*              Workspace: prefer M*M [VT] + M*N [L]
+*              At this point, L is resized as M by chunk.
 *
                DO 30 I = 1, N, CHUNK
-                  BLK = MIN( N-I+1, CHUNK )
+                  BLK = MIN( N - I + 1, CHUNK )
                   CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M,
      $                        A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL )
                   CALL DLACPY( 'F', M, BLK, WORK( IL ), LDWRKL,
@@ -1020,7 +1208,7 @@
 *
             ELSE IF( WNTQS ) THEN
 *
-*              Path 3t (N much larger than M, JOBZ='S')
+*              Path 3t (N >> M, JOBZ='S')
 *              M right singular vectors to be computed in VT and
 *              M left singular vectors to be computed in U
 *
@@ -1033,38 +1221,41 @@
                NWORK = ITAU + M
 *
 *              Compute A=L*Q
-*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              Workspace: need   M*M [L] + M [tau] + M    [work]
+*              Workspace: prefer M*M [L] + M [tau] + M*NB [work]
 *
                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Copy L to WORK(IL), zeroing out above it
 *
                CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
-               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
-     $                      WORK( IL+LDWRKL ), LDWRKL )
+               CALL DLASET( 'U', M - 1, M - 1, ZERO, ZERO,
+     $                      WORK( IL + LDWRKL ), LDWRKL )
 *
 *              Generate Q in A
-*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              Workspace: need   M*M [L] + M [tau] + M    [work]
+*              Workspace: prefer M*M [L] + M [tau] + M*NB [work]
 *
                CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
                IE = ITAU
                ITAUQ = IE + M
                ITAUP = ITAUQ + M
                NWORK = ITAUP + M
 *
-*              Bidiagonalize L in WORK(IU), copying result to U
-*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*              Bidiagonalize L in WORK(IU).
+*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M      [work]
+*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
 *
                CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
      $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in U and computing right singular
 *              vectors of bidiagonal matrix in VT
-*              (Workspace: need M+BDSPAC)
+*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + BDSPAC
 *
                CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT,
      $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
@@ -1072,18 +1263,19 @@
 *
 *              Overwrite U by left singular vectors of L and VT
 *              by right singular vectors of L
-*              (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M    [work]
+*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
 *              Multiply right singular vectors of L in WORK(IL) by
 *              Q in A, storing result in VT
-*              (Workspace: need M*M)
+*              Workspace: need   M*M [L]
 *
                CALL DLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL )
                CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL,
@@ -1091,7 +1283,7 @@
 *
             ELSE IF( WNTQA ) THEN
 *
-*              Path 4t (N much larger than M, JOBZ='A')
+*              Path 4t (N >> M, JOBZ='A')
 *              N right singular vectors to be computed in VT and
 *              M left singular vectors to be computed in U
 *
@@ -1104,17 +1296,19 @@
                NWORK = ITAU + M
 *
 *              Compute A=L*Q, copying result to VT
-*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              Workspace: need   M*M [VT] + M [tau] + M    [work]
+*              Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
 *
                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *              Generate Q in VT
-*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              Workspace: need   M*M [VT] + M [tau] + N    [work]
+*              Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
 *
                CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 *
 *              Produce L in A, zeroing out other entries
 *
@@ -1125,7 +1319,8 @@
                NWORK = ITAUP + M
 *
 *              Bidiagonalize L in A
-*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + M      [work]
+*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
 *
                CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
@@ -1134,7 +1329,7 @@
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in U and computing right singular
 *              vectors of bidiagonal matrix in WORK(IVT)
-*              (Workspace: need M+M*M+BDSPAC)
+*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
 *
                CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
      $                      WORK( IVT ), LDWKVT, DUM, IDUM,
@@ -1142,18 +1337,19 @@
 *
 *              Overwrite U by left singular vectors of L and WORK(IVT)
 *              by right singular vectors of L
-*              (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup]+ M    [work]
+*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', M, M, M, A, LDA,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', M, M, M, A, LDA,
      $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
-     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )
 *
 *              Multiply right singular vectors of L in WORK(IVT) by
 *              Q in VT, storing result in A
-*              (Workspace: need M*M)
+*              Workspace: need   M*M [VT]
 *
                CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT,
      $                     VT, LDVT, ZERO, A, LDA )
@@ -1168,7 +1364,7 @@
 *
 *           N .LT. MNTHR
 *
-*           Path 5t (N greater than M, but not much larger)
+*           Path 5t (N > M, but not much larger)
 *           Reduce to bidiagonal form without LQ decomposition
 *
             IE = 1
@@ -1177,28 +1373,33 @@
             NWORK = ITAUP + M
 *
 *           Bidiagonalize A
-*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*           Workspace: need   3*M [e, tauq, taup] + N        [work]
+*           Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
 *
             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
      $                   IERR )
             IF( WNTQN ) THEN
 *
+*              Path 5tn (N > M, JOBZ='N')
 *              Perform bidiagonal SVD, only computing singular values
-*              (Workspace: need M+BDSPAC)
+*              Workspace: need   3*M [e, tauq, taup] + BDSPAC
 *
                CALL DBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
      $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
             ELSE IF( WNTQO ) THEN
+*              Path 5to (N > M, JOBZ='O')
                LDWKVT = M
                IVT = NWORK
-               IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN
+               IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN
 *
 *                 WORK( IVT ) is M by N
 *
                   CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ),
      $                         LDWKVT )
                   NWORK = IVT + LDWKVT*N
+*                 IL is unused; silence compile warnings
+                  IL = -1
                ELSE
 *
 *                 WORK( IVT ) is M by M
@@ -1208,52 +1409,58 @@
 *
 *                 WORK(IL) is M by CHUNK
 *
-                  CHUNK = ( LWORK-M*M-3*M ) / M
+                  CHUNK = ( LWORK - M*M - 3*M ) / M
                END IF
 *
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in U and computing right singular
 *              vectors of bidiagonal matrix in WORK(IVT)
-*              (Workspace: need M*M+BDSPAC)
+*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + BDSPAC
 *
                CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU,
      $                      WORK( IVT ), LDWKVT, DUM, IDUM,
      $                      WORK( NWORK ), IWORK, INFO )
 *
 *              Overwrite U by left singular vectors of A
-*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]
+*              Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
 *
-               IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN
+               IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN
 *
+*                 Path 5to-fast
 *                 Overwrite WORK(IVT) by left singular vectors of A
-*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 Workspace: need   3*M [e, tauq, taup] + M*N [VT] + M    [work]
+*                 Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
 *
                   CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
      $                         WORK( ITAUP ), WORK( IVT ), LDWKVT,
-     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )
 *
 *                 Copy right singular vectors of A from WORK(IVT) to A
 *
                   CALL DLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA )
                ELSE
 *
+*                 Path 5to-slow
 *                 Generate P**T in A
-*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]
+*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
 *
                   CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
-     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+     $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )
 *
 *                 Multiply Q in A by right singular vectors of
 *                 bidiagonal matrix in WORK(IVT), storing result in
 *                 WORK(IL) and copying to A
-*                 (Workspace: need 2*M*M, prefer M*M+M*N)
+*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
+*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N  [L]
 *
                   DO 40 I = 1, N, CHUNK
-                     BLK = MIN( N-I+1, CHUNK )
+                     BLK = MIN( N - I + 1, CHUNK )
                      CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ),
      $                           LDWKVT, A( 1, I ), LDA, ZERO,
      $                           WORK( IL ), M )
@@ -1263,10 +1470,11 @@
                END IF
             ELSE IF( WNTQS ) THEN
 *
+*              Path 5ts (N > M, JOBZ='S')
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in U and computing right singular
 *              vectors of bidiagonal matrix in VT
-*              (Workspace: need M+BDSPAC)
+*              Workspace: need   3*M [e, tauq, taup] + BDSPAC
 *
                CALL DLASET( 'F', M, N, ZERO, ZERO, VT, LDVT )
                CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
@@ -1275,20 +1483,22 @@
 *
 *              Overwrite U by left singular vectors of A and VT
 *              by right singular vectors of A
-*              (Workspace: need 3*M, prefer 2*M+M*NB)
+*              Workspace: need   3*M [e, tauq, taup] + M    [work]
+*              Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
             ELSE IF( WNTQA ) THEN
 *
+*              Path 5ta (N > M, JOBZ='A')
 *              Perform bidiagonal SVD, computing left singular vectors
 *              of bidiagonal matrix in U and computing right singular
 *              vectors of bidiagonal matrix in VT
-*              (Workspace: need M+BDSPAC)
+*              Workspace: need   3*M [e, tauq, taup] + BDSPAC
 *
                CALL DLASET( 'F', N, N, ZERO, ZERO, VT, LDVT )
                CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
@@ -1298,20 +1508,21 @@
 *              Set the right corner of VT to identity matrix
 *
                IF( N.GT.M ) THEN
-                  CALL DLASET( 'F', N-M, N-M, ZERO, ONE, VT( M+1, M+1 ),
+                  CALL DLASET( 'F', N-M, N-M, ZERO, ONE, VT(M+1,M+1),
      $                         LDVT )
                END IF
 *
 *              Overwrite U by left singular vectors of A and VT
 *              by right singular vectors of A
-*              (Workspace: need 2*M+N, prefer 2*M+N*NB)
+*              Workspace: need   3*M [e, tauq, taup] + N    [work]
+*              Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
 *
                CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
      $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
                CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
      $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
-     $                      LWORK-NWORK+1, IERR )
+     $                      LWORK - NWORK + 1, IERR )
             END IF
 *
          END IF
@@ -1331,7 +1542,7 @@
 *
 *     Return optimal workspace in WORK(1)
 *
-      WORK( 1 ) = MAXWRK
+      WORK( 1 ) = DROUNDUP_LWORK( MAXWRK )
 *
       RETURN
 *