--- rpl/lapack/lapack/dgehrd.f 2010/01/26 15:22:46 1.1.1.1 +++ rpl/lapack/lapack/dgehrd.f 2023/08/07 08:38:48 1.19 @@ -1,9 +1,173 @@ +*> \brief \b DGEHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEHRD reduces a real general matrix A to upper Hessenberg form H by +*> an orthogonal similarity transformation: Q**T * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to DGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the orthogonal matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +*> zero. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,N). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This file is a slight modification of LAPACK-3.0's DGEHRD +*> subroutine incorporating improvements proposed by Quintana-Orti and +*> Van de Geijn (2006). (See DLAHR2.) +*> \endverbatim +*> +* ===================================================================== SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) * -* -- LAPACK routine (version 3.2.1) -- +* -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* -- April 2009 -- * * .. Scalar Arguments .. INTEGER IHI, ILO, INFO, LDA, LWORK, N @@ -12,114 +176,22 @@ DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) * .. * -* Purpose -* ======= -* -* DGEHRD reduces a real general matrix A to upper Hessenberg form H by -* an orthogonal similarity transformation: Q' * A * Q = H . -* -* Arguments -* ========= -* -* N (input) INTEGER -* The order of the matrix A. N >= 0. -* -* ILO (input) INTEGER -* IHI (input) INTEGER -* It is assumed that A is already upper triangular in rows -* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally -* set by a previous call to DGEBAL; otherwise they should be -* set to 1 and N respectively. See Further Details. -* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. -* -* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) -* On entry, the N-by-N general matrix to be reduced. -* On exit, the upper triangle and the first subdiagonal of A -* are overwritten with the upper Hessenberg matrix H, and the -* elements below the first subdiagonal, with the array TAU, -* represent the orthogonal matrix Q as a product of elementary -* reflectors. See Further Details. -* -* LDA (input) INTEGER -* The leading dimension of the array A. LDA >= max(1,N). -* -* TAU (output) DOUBLE PRECISION array, dimension (N-1) -* The scalar factors of the elementary reflectors (see Further -* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to -* zero. -* -* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) -* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. -* -* LWORK (input) INTEGER -* The length of the array WORK. LWORK >= max(1,N). -* For optimum performance LWORK >= N*NB, where NB is the -* optimal blocksize. -* -* If LWORK = -1, then a workspace query is assumed; the routine -* only calculates the optimal size of the WORK array, returns -* this value as the first entry of the WORK array, and no error -* message related to LWORK is issued by XERBLA. -* -* INFO (output) INTEGER -* = 0: successful exit -* < 0: if INFO = -i, the i-th argument had an illegal value. -* -* Further Details -* =============== -* -* The matrix Q is represented as a product of (ihi-ilo) elementary -* reflectors -* -* Q = H(ilo) H(ilo+1) . . . H(ihi-1). -* -* Each H(i) has the form -* -* H(i) = I - tau * v * v' -* -* where tau is a real scalar, and v is a real vector with -* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on -* exit in A(i+2:ihi,i), and tau in TAU(i). -* -* The contents of A are illustrated by the following example, with -* n = 7, ilo = 2 and ihi = 6: -* -* on entry, on exit, -* -* ( a a a a a a a ) ( a a h h h h a ) -* ( a a a a a a ) ( a h h h h a ) -* ( a a a a a a ) ( h h h h h h ) -* ( a a a a a a ) ( v2 h h h h h ) -* ( a a a a a a ) ( v2 v3 h h h h ) -* ( a a a a a a ) ( v2 v3 v4 h h h ) -* ( a ) ( a ) -* -* where a denotes an element of the original matrix A, h denotes a -* modified element of the upper Hessenberg matrix H, and vi denotes an -* element of the vector defining H(i). -* -* This file is a slight modification of LAPACK-3.0's DGEHRD -* subroutine incorporating improvements proposed by Quintana-Orti and -* Van de Geijn (2006). (See DLAHR2.) -* * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) DOUBLE PRECISION ZERO, ONE - PARAMETER ( ZERO = 0.0D+0, + PARAMETER ( ZERO = 0.0D+0, $ ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY - INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, $ NBMIN, NH, NX DOUBLE PRECISION EI * .. -* .. Local Arrays .. - DOUBLE PRECISION T( LDT, NBMAX ) -* .. * .. External Subroutines .. EXTERNAL DAXPY, DGEHD2, DGEMM, DLAHR2, DLARFB, DTRMM, $ XERBLA @@ -136,9 +208,6 @@ * Test the input parameters * INFO = 0 - NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -151,6 +220,16 @@ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -8 END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* IF( INFO.NE.0 ) THEN CALL XERBLA( 'DGEHRD', -INFO ) RETURN @@ -179,7 +258,6 @@ * NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) NBMIN = 2 - IWS = 1 IF( NB.GT.1 .AND. NB.LT.NH ) THEN * * Determine when to cross over from blocked to unblocked code @@ -190,8 +268,7 @@ * * Determine if workspace is large enough for blocked code * - IWS = N*NB - IF( LWORK.LT.IWS ) THEN + IF( LWORK.LT.N*NB+TSIZE ) THEN * * Not enough workspace to use optimal NB: determine the * minimum value of NB, and reduce NB or force use of @@ -199,8 +276,8 @@ * NBMIN = MAX( 2, ILAENV( 2, 'DGEHRD', ' ', N, ILO, IHI, $ -1 ) ) - IF( LWORK.GE.N*NBMIN ) THEN - NB = LWORK / N + IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN + NB = (LWORK-TSIZE) / N ELSE NB = 1 END IF @@ -219,23 +296,24 @@ * * Use blocked code * + IWT = 1 + N*NB DO 40 I = ILO, IHI - 1 - NX, NB IB = MIN( NB, IHI-I ) * * Reduce columns i:i+ib-1 to Hessenberg form, returning the -* matrices V and T of the block reflector H = I - V*T*V' +* matrices V and T of the block reflector H = I - V*T*V**T * which performs the reduction, and also the matrix Y = A*V*T * - CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, - $ WORK, LDWORK ) + CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) * * Apply the block reflector H to A(1:ihi,i+ib:ihi) from the -* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set +* right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set * to 1 * EI = A( I+IB, I+IB-1 ) A( I+IB, I+IB-1 ) = ONE - CALL DGEMM( 'No transpose', 'Transpose', + CALL DGEMM( 'No transpose', 'Transpose', $ IHI, IHI-I-IB+1, $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, $ A( 1, I+IB ), LDA ) @@ -257,15 +335,16 @@ * CALL DLARFB( 'Left', 'Transpose', 'Forward', $ 'Columnwise', - $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, - $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) 40 CONTINUE END IF * * Use unblocked code to reduce the rest of the matrix * CALL DGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) - WORK( 1 ) = IWS + WORK( 1 ) = LWKOPT * RETURN *