--- rpl/lapack/lapack/dgehrd.f	2010/01/26 15:22:46	1.1
+++ rpl/lapack/lapack/dgehrd.f	2017/06/17 11:06:15	1.17
@@ -1,9 +1,176 @@
+*> \brief \b DGEHRD
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DGEHRD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgehrd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgehrd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgehrd.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION  A( LDA, * ), TAU( * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGEHRD reduces a real general matrix A to upper Hessenberg form H by
+*> an orthogonal similarity transformation:  Q**T * A * Q = H .
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in] ILO
+*> \verbatim
+*>          ILO is INTEGER
+*> \endverbatim
+*>
+*> \param[in] IHI
+*> \verbatim
+*>          IHI is INTEGER
+*>
+*>          It is assumed that A is already upper triangular in rows
+*>          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*>          set by a previous call to DGEBAL; otherwise they should be
+*>          set to 1 and N respectively. See Further Details.
+*>          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the N-by-N general matrix to be reduced.
+*>          On exit, the upper triangle and the first subdiagonal of A
+*>          are overwritten with the upper Hessenberg matrix H, and the
+*>          elements below the first subdiagonal, with the array TAU,
+*>          represent the orthogonal matrix Q as a product of elementary
+*>          reflectors. See Further Details.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,N).
+*> \endverbatim
+*>
+*> \param[out] TAU
+*> \verbatim
+*>          TAU is DOUBLE PRECISION array, dimension (N-1)
+*>          The scalar factors of the elementary reflectors (see Further
+*>          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+*>          zero.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (LWORK)
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The length of the array WORK.  LWORK >= max(1,N).
+*>          For good performance, LWORK should generally be larger.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal size of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup doubleGEcomputational
+*
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*>  The matrix Q is represented as a product of (ihi-ilo) elementary
+*>  reflectors
+*>
+*>     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*>
+*>  Each H(i) has the form
+*>
+*>     H(i) = I - tau * v * v**T
+*>
+*>  where tau is a real scalar, and v is a real vector with
+*>  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*>  exit in A(i+2:ihi,i), and tau in TAU(i).
+*>
+*>  The contents of A are illustrated by the following example, with
+*>  n = 7, ilo = 2 and ihi = 6:
+*>
+*>  on entry,                        on exit,
+*>
+*>  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*>  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*>  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*>  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*>  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*>  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*>  (                         a )    (                          a )
+*>
+*>  where a denotes an element of the original matrix A, h denotes a
+*>  modified element of the upper Hessenberg matrix H, and vi denotes an
+*>  element of the vector defining H(i).
+*>
+*>  This file is a slight modification of LAPACK-3.0's DGEHRD
+*>  subroutine incorporating improvements proposed by Quintana-Orti and
+*>  Van de Geijn (2006). (See DLAHR2.)
+*> \endverbatim
+*>
+*  =====================================================================
       SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK routine (version 3.2.1)                                  --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*  -- April 2009                                                      --
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            IHI, ILO, INFO, LDA, LWORK, N
@@ -12,114 +179,22 @@
       DOUBLE PRECISION  A( LDA, * ), TAU( * ), WORK( * )
 *     ..
 *
-*  Purpose
-*  =======
-*
-*  DGEHRD reduces a real general matrix A to upper Hessenberg form H by
-*  an orthogonal similarity transformation:  Q' * A * Q = H .
-*
-*  Arguments
-*  =========
-*
-*  N       (input) INTEGER
-*          The order of the matrix A.  N >= 0.
-*
-*  ILO     (input) INTEGER
-*  IHI     (input) INTEGER
-*          It is assumed that A is already upper triangular in rows
-*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-*          set by a previous call to DGEBAL; otherwise they should be
-*          set to 1 and N respectively. See Further Details.
-*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-*
-*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-*          On entry, the N-by-N general matrix to be reduced.
-*          On exit, the upper triangle and the first subdiagonal of A
-*          are overwritten with the upper Hessenberg matrix H, and the
-*          elements below the first subdiagonal, with the array TAU,
-*          represent the orthogonal matrix Q as a product of elementary
-*          reflectors. See Further Details.
-*
-*  LDA     (input) INTEGER
-*          The leading dimension of the array A.  LDA >= max(1,N).
-*
-*  TAU     (output) DOUBLE PRECISION array, dimension (N-1)
-*          The scalar factors of the elementary reflectors (see Further
-*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
-*          zero.
-*
-*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-*
-*  LWORK   (input) INTEGER
-*          The length of the array WORK.  LWORK >= max(1,N).
-*          For optimum performance LWORK >= N*NB, where NB is the
-*          optimal blocksize.
-*
-*          If LWORK = -1, then a workspace query is assumed; the routine
-*          only calculates the optimal size of the WORK array, returns
-*          this value as the first entry of the WORK array, and no error
-*          message related to LWORK is issued by XERBLA.
-*
-*  INFO    (output) INTEGER
-*          = 0:  successful exit
-*          < 0:  if INFO = -i, the i-th argument had an illegal value.
-*
-*  Further Details
-*  ===============
-*
-*  The matrix Q is represented as a product of (ihi-ilo) elementary
-*  reflectors
-*
-*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
-*
-*  Each H(i) has the form
-*
-*     H(i) = I - tau * v * v'
-*
-*  where tau is a real scalar, and v is a real vector with
-*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
-*  exit in A(i+2:ihi,i), and tau in TAU(i).
-*
-*  The contents of A are illustrated by the following example, with
-*  n = 7, ilo = 2 and ihi = 6:
-*
-*  on entry,                        on exit,
-*
-*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
-*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
-*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
-*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
-*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
-*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
-*  (                         a )    (                          a )
-*
-*  where a denotes an element of the original matrix A, h denotes a
-*  modified element of the upper Hessenberg matrix H, and vi denotes an
-*  element of the vector defining H(i).
-*
-*  This file is a slight modification of LAPACK-3.0's DGEHRD
-*  subroutine incorporating improvements proposed by Quintana-Orti and
-*  Van de Geijn (2006). (See DLAHR2.)
-*
 *  =====================================================================
 *
 *     .. Parameters ..
-      INTEGER            NBMAX, LDT
-      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      INTEGER            NBMAX, LDT, TSIZE
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1,
+     $                     TSIZE = LDT*NBMAX )
       DOUBLE PRECISION  ZERO, ONE
-      PARAMETER          ( ZERO = 0.0D+0, 
+      PARAMETER          ( ZERO = 0.0D+0,
      $                     ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
       LOGICAL            LQUERY
-      INTEGER            I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB,
+      INTEGER            I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB,
      $                   NBMIN, NH, NX
       DOUBLE PRECISION  EI
 *     ..
-*     .. Local Arrays ..
-      DOUBLE PRECISION  T( LDT, NBMAX )
-*     ..
 *     .. External Subroutines ..
       EXTERNAL           DAXPY, DGEHD2, DGEMM, DLAHR2, DLARFB, DTRMM,
      $                   XERBLA
@@ -136,9 +211,6 @@
 *     Test the input parameters
 *
       INFO = 0
-      NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) )
-      LWKOPT = N*NB
-      WORK( 1 ) = LWKOPT
       LQUERY = ( LWORK.EQ.-1 )
       IF( N.LT.0 ) THEN
          INFO = -1
@@ -151,6 +223,16 @@
       ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
          INFO = -8
       END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Compute the workspace requirements
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) )
+         LWKOPT = N*NB + TSIZE
+         WORK( 1 ) = LWKOPT
+      END IF
+*
       IF( INFO.NE.0 ) THEN
          CALL XERBLA( 'DGEHRD', -INFO )
          RETURN
@@ -179,7 +261,6 @@
 *
       NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) )
       NBMIN = 2
-      IWS = 1
       IF( NB.GT.1 .AND. NB.LT.NH ) THEN
 *
 *        Determine when to cross over from blocked to unblocked code
@@ -190,8 +271,7 @@
 *
 *           Determine if workspace is large enough for blocked code
 *
-            IWS = N*NB
-            IF( LWORK.LT.IWS ) THEN
+            IF( LWORK.LT.N*NB+TSIZE ) THEN
 *
 *              Not enough workspace to use optimal NB:  determine the
 *              minimum value of NB, and reduce NB or force use of
@@ -199,8 +279,8 @@
 *
                NBMIN = MAX( 2, ILAENV( 2, 'DGEHRD', ' ', N, ILO, IHI,
      $                 -1 ) )
-               IF( LWORK.GE.N*NBMIN ) THEN
-                  NB = LWORK / N
+               IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN
+                  NB = (LWORK-TSIZE) / N
                ELSE
                   NB = 1
                END IF
@@ -219,23 +299,24 @@
 *
 *        Use blocked code
 *
+         IWT = 1 + N*NB
          DO 40 I = ILO, IHI - 1 - NX, NB
             IB = MIN( NB, IHI-I )
 *
 *           Reduce columns i:i+ib-1 to Hessenberg form, returning the
-*           matrices V and T of the block reflector H = I - V*T*V'
+*           matrices V and T of the block reflector H = I - V*T*V**T
 *           which performs the reduction, and also the matrix Y = A*V*T
 *
-            CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
-     $                   WORK, LDWORK )
+            CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ),
+     $                   WORK( IWT ), LDT, WORK, LDWORK )
 *
 *           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
-*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+*           right, computing  A := A - Y * V**T. V(i+ib,ib-1) must be set
 *           to 1
 *
             EI = A( I+IB, I+IB-1 )
             A( I+IB, I+IB-1 ) = ONE
-            CALL DGEMM( 'No transpose', 'Transpose', 
+            CALL DGEMM( 'No transpose', 'Transpose',
      $                  IHI, IHI-I-IB+1,
      $                  IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE,
      $                  A( 1, I+IB ), LDA )
@@ -257,15 +338,16 @@
 *
             CALL DLARFB( 'Left', 'Transpose', 'Forward',
      $                   'Columnwise',
-     $                   IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
-     $                   A( I+1, I+IB ), LDA, WORK, LDWORK )
+     $                   IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA,
+     $                   WORK( IWT ), LDT, A( I+1, I+IB ), LDA,
+     $                   WORK, LDWORK )
    40    CONTINUE
       END IF
 *
 *     Use unblocked code to reduce the rest of the matrix
 *
       CALL DGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
-      WORK( 1 ) = IWS
+      WORK( 1 ) = LWKOPT
 *
       RETURN
 *