--- rpl/lapack/lapack/zlahef.f 2010/01/26 15:22:46 1.1
+++ rpl/lapack/lapack/zlahef.f 2016/08/27 15:34:57 1.16
@@ -1,9 +1,186 @@
+*> \brief \b ZLAHEF computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kaufman diagonal pivoting method (blocked algorithm, calling Level 3 BLAS).
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download ZLAHEF + dependencies
+*>
+*> [TGZ]
+*>
+*> [ZIP]
+*>
+*> [TXT]
+*> \endhtmlonly
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE ZLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
+*
+* .. Scalar Arguments ..
+* CHARACTER UPLO
+* INTEGER INFO, KB, LDA, LDW, N, NB
+* ..
+* .. Array Arguments ..
+* INTEGER IPIV( * )
+* COMPLEX*16 A( LDA, * ), W( LDW, * )
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> ZLAHEF computes a partial factorization of a complex Hermitian
+*> matrix A using the Bunch-Kaufman diagonal pivoting method. The
+*> partial factorization has the form:
+*>
+*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or:
+*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H )
+*>
+*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L'
+*> ( L21 I ) ( 0 A22 ) ( 0 I )
+*>
+*> where the order of D is at most NB. The actual order is returned in
+*> the argument KB, and is either NB or NB-1, or N if N <= NB.
+*> Note that U**H denotes the conjugate transpose of U.
+*>
+*> ZLAHEF is an auxiliary routine called by ZHETRF. It uses blocked code
+*> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
+*> A22 (if UPLO = 'L').
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*> UPLO is CHARACTER*1
+*> Specifies whether the upper or lower triangular part of the
+*> Hermitian matrix A is stored:
+*> = 'U': Upper triangular
+*> = 'L': Lower triangular
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> The order of the matrix A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NB
+*> \verbatim
+*> NB is INTEGER
+*> The maximum number of columns of the matrix A that should be
+*> factored. NB should be at least 2 to allow for 2-by-2 pivot
+*> blocks.
+*> \endverbatim
+*>
+*> \param[out] KB
+*> \verbatim
+*> KB is INTEGER
+*> The number of columns of A that were actually factored.
+*> KB is either NB-1 or NB, or N if N <= NB.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*> A is COMPLEX*16 array, dimension (LDA,N)
+*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
+*> n-by-n upper triangular part of A contains the upper
+*> triangular part of the matrix A, and the strictly lower
+*> triangular part of A is not referenced. If UPLO = 'L', the
+*> leading n-by-n lower triangular part of A contains the lower
+*> triangular part of the matrix A, and the strictly upper
+*> triangular part of A is not referenced.
+*> On exit, A contains details of the partial factorization.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*> LDA is INTEGER
+*> The leading dimension of the array A. LDA >= max(1,N).
+*> \endverbatim
+*>
+*> \param[out] IPIV
+*> \verbatim
+*> IPIV is INTEGER array, dimension (N)
+*> Details of the interchanges and the block structure of D.
+*>
+*> If UPLO = 'U':
+*> Only the last KB elements of IPIV are set.
+*>
+*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*> interchanged and D(k,k) is a 1-by-1 diagonal block.
+*>
+*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns
+*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*> is a 2-by-2 diagonal block.
+*>
+*> If UPLO = 'L':
+*> Only the first KB elements of IPIV are set.
+*>
+*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*> interchanged and D(k,k) is a 1-by-1 diagonal block.
+*>
+*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns
+*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1)
+*> is a 2-by-2 diagonal block.
+*> \endverbatim
+*>
+*> \param[out] W
+*> \verbatim
+*> W is COMPLEX*16 array, dimension (LDW,NB)
+*> \endverbatim
+*>
+*> \param[in] LDW
+*> \verbatim
+*> LDW is INTEGER
+*> The leading dimension of the array W. LDW >= max(1,N).
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*> INFO is INTEGER
+*> = 0: successful exit
+*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization
+*> has been completed, but the block diagonal matrix D is
+*> exactly singular.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2013
+*
+*> \ingroup complex16HEcomputational
+*
+*> \par Contributors:
+* ==================
+*>
+*> \verbatim
+*>
+*> November 2013, Igor Kozachenko,
+*> Computer Science Division,
+*> University of California, Berkeley
+*> \endverbatim
+*
+* =====================================================================
SUBROUTINE ZLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
*
-* -- LAPACK routine (version 3.2) --
+* -- LAPACK computational routine (version 3.5.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-* November 2006
+* November 2013
*
* .. Scalar Arguments ..
CHARACTER UPLO
@@ -14,85 +191,6 @@
COMPLEX*16 A( LDA, * ), W( LDW, * )
* ..
*
-* Purpose
-* =======
-*
-* ZLAHEF computes a partial factorization of a complex Hermitian
-* matrix A using the Bunch-Kaufman diagonal pivoting method. The
-* partial factorization has the form:
-*
-* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or:
-* ( 0 U22 ) ( 0 D ) ( U12' U22' )
-*
-* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L'
-* ( L21 I ) ( 0 A22 ) ( 0 I )
-*
-* where the order of D is at most NB. The actual order is returned in
-* the argument KB, and is either NB or NB-1, or N if N <= NB.
-* Note that U' denotes the conjugate transpose of U.
-*
-* ZLAHEF is an auxiliary routine called by ZHETRF. It uses blocked code
-* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
-* A22 (if UPLO = 'L').
-*
-* Arguments
-* =========
-*
-* UPLO (input) CHARACTER*1
-* Specifies whether the upper or lower triangular part of the
-* Hermitian matrix A is stored:
-* = 'U': Upper triangular
-* = 'L': Lower triangular
-*
-* N (input) INTEGER
-* The order of the matrix A. N >= 0.
-*
-* NB (input) INTEGER
-* The maximum number of columns of the matrix A that should be
-* factored. NB should be at least 2 to allow for 2-by-2 pivot
-* blocks.
-*
-* KB (output) INTEGER
-* The number of columns of A that were actually factored.
-* KB is either NB-1 or NB, or N if N <= NB.
-*
-* A (input/output) COMPLEX*16 array, dimension (LDA,N)
-* On entry, the Hermitian matrix A. If UPLO = 'U', the leading
-* n-by-n upper triangular part of A contains the upper
-* triangular part of the matrix A, and the strictly lower
-* triangular part of A is not referenced. If UPLO = 'L', the
-* leading n-by-n lower triangular part of A contains the lower
-* triangular part of the matrix A, and the strictly upper
-* triangular part of A is not referenced.
-* On exit, A contains details of the partial factorization.
-*
-* LDA (input) INTEGER
-* The leading dimension of the array A. LDA >= max(1,N).
-*
-* IPIV (output) INTEGER array, dimension (N)
-* Details of the interchanges and the block structure of D.
-* If UPLO = 'U', only the last KB elements of IPIV are set;
-* if UPLO = 'L', only the first KB elements are set.
-*
-* If IPIV(k) > 0, then rows and columns k and IPIV(k) were
-* interchanged and D(k,k) is a 1-by-1 diagonal block.
-* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
-* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
-* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) =
-* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
-* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
-*
-* W (workspace) COMPLEX*16 array, dimension (LDW,NB)
-*
-* LDW (input) INTEGER
-* The leading dimension of the array W. LDW >= max(1,N).
-*
-* INFO (output) INTEGER
-* = 0: successful exit
-* > 0: if INFO = k, D(k,k) is exactly zero. The factorization
-* has been completed, but the block diagonal matrix D is
-* exactly singular.
-*
* =====================================================================
*
* .. Parameters ..
@@ -153,6 +251,8 @@
IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 )
$ GO TO 30
*
+ KSTEP = 1
+*
* Copy column K of A to column KW of W and update it
*
CALL ZCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 )
@@ -163,15 +263,14 @@
W( K, KW ) = DBLE( W( K, KW ) )
END IF
*
- KSTEP = 1
-*
* Determine rows and columns to be interchanged and whether
* a 1-by-1 or 2-by-2 pivot block will be used
*
ABSAKK = ABS( DBLE( W( K, KW ) ) )
*
* IMAX is the row-index of the largest off-diagonal element in
-* column K, and COLMAX is its absolute value
+* column K, and COLMAX is its absolute value.
+* Determine both COLMAX and IMAX.
*
IF( K.GT.1 ) THEN
IMAX = IZAMAX( K-1, W( 1, KW ), 1 )
@@ -182,13 +281,19 @@
*
IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
*
-* Column K is zero: set INFO and continue
+* Column K is zero or underflow: set INFO and continue
*
IF( INFO.EQ.0 )
$ INFO = K
KP = K
A( K, K ) = DBLE( A( K, K ) )
ELSE
+*
+* ============================================================
+*
+* BEGIN pivot search
+*
+* Case(1)
IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
*
* no interchange, use 1-by-1 pivot block
@@ -196,6 +301,9 @@
KP = K
ELSE
*
+* BEGIN pivot search along IMAX row
+*
+*
* Copy column IMAX to column KW-1 of W and update it
*
CALL ZCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 )
@@ -211,7 +319,8 @@
END IF
*
* JMAX is the column-index of the largest off-diagonal
-* element in row IMAX, and ROWMAX is its absolute value
+* element in row IMAX, and ROWMAX is its absolute value.
+* Determine only ROWMAX.
*
JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 )
ROWMAX = CABS1( W( JMAX, KW-1 ) )
@@ -220,11 +329,14 @@
ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) )
END IF
*
+* Case(2)
IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
*
* no interchange, use 1-by-1 pivot block
*
KP = K
+*
+* Case(3)
ELSE IF( ABS( DBLE( W( IMAX, KW-1 ) ) ).GE.ALPHA*ROWMAX )
$ THEN
*
@@ -233,9 +345,11 @@
*
KP = IMAX
*
-* copy column KW-1 of W to column KW
+* copy column KW-1 of W to column KW of W
*
CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 )
+*
+* Case(4)
ELSE
*
* interchange rows and columns K-1 and IMAX, use 2-by-2
@@ -244,27 +358,48 @@
KP = IMAX
KSTEP = 2
END IF
+*
+*
+* END pivot search along IMAX row
+*
END IF
*
+* END pivot search
+*
+* ============================================================
+*
+* KK is the column of A where pivoting step stopped
+*
KK = K - KSTEP + 1
+*
+* KKW is the column of W which corresponds to column KK of A
+*
KKW = NB + KK - N
*
-* Updated column KP is already stored in column KKW of W
+* Interchange rows and columns KP and KK.
+* Updated column KP is already stored in column KKW of W.
*
IF( KP.NE.KK ) THEN
*
-* Copy non-updated column KK to column KP
+* Copy non-updated column KK to column KP of submatrix A
+* at step K. No need to copy element into column K
+* (or K and K-1 for 2-by-2 pivot) of A, since these columns
+* will be later overwritten.
*
A( KP, KP ) = DBLE( A( KK, KK ) )
CALL ZCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ),
$ LDA )
CALL ZLACGV( KK-1-KP, A( KP, KP+1 ), LDA )
- CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+ IF( KP.GT.1 )
+ $ CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
*
-* Interchange rows KK and KP in last KK columns of A and W
+* Interchange rows KK and KP in last K+1 to N columns of A
+* (columns K (or K and K-1 for 2-by-2 pivot) of A will be
+* later overwritten). Interchange rows KK and KP
+* in last KKW to NB columns of W.
*
- IF( KK.LT.N )
- $ CALL ZSWAP( N-KK, A( KK, KK+1 ), LDA, A( KP, KK+1 ),
+ IF( K.LT.N )
+ $ CALL ZSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ),
$ LDA )
CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ),
$ LDW )
@@ -272,40 +407,108 @@
*
IF( KSTEP.EQ.1 ) THEN
*
-* 1-by-1 pivot block D(k): column KW of W now holds
+* 1-by-1 pivot block D(k): column kw of W now holds
*
-* W(k) = U(k)*D(k)
+* W(kw) = U(k)*D(k),
*
* where U(k) is the k-th column of U
*
-* Store U(k) in column k of A
-*
+* (1) Store subdiag. elements of column U(k)
+* and 1-by-1 block D(k) in column k of A.
+* (NOTE: Diagonal element U(k,k) is a UNIT element
+* and not stored)
+* A(k,k) := D(k,k) = W(k,kw)
+* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k)
+*
+* (NOTE: No need to use for Hermitian matrix
+* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal
+* element D(k,k) from W (potentially saves only one load))
CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 )
- R1 = ONE / DBLE( A( K, K ) )
- CALL ZDSCAL( K-1, R1, A( 1, K ), 1 )
+ IF( K.GT.1 ) THEN
*
-* Conjugate W(k)
+* (NOTE: No need to check if A(k,k) is NOT ZERO,
+* since that was ensured earlier in pivot search:
+* case A(k,k) = 0 falls into 2x2 pivot case(4))
+*
+ R1 = ONE / DBLE( A( K, K ) )
+ CALL ZDSCAL( K-1, R1, A( 1, K ), 1 )
+*
+* (2) Conjugate column W(kw)
+*
+ CALL ZLACGV( K-1, W( 1, KW ), 1 )
+ END IF
*
- CALL ZLACGV( K-1, W( 1, KW ), 1 )
ELSE
*
-* 2-by-2 pivot block D(k): columns KW and KW-1 of W now
-* hold
+* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold
*
-* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k)
*
* where U(k) and U(k-1) are the k-th and (k-1)-th columns
* of U
*
+* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2
+* block D(k-1:k,k-1:k) in columns k-1 and k of A.
+* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT
+* block and not stored)
+* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw)
+* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) =
+* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) )
+*
IF( K.GT.2 ) THEN
*
-* Store U(k) and U(k-1) in columns k and k-1 of A
+* Factor out the columns of the inverse of 2-by-2 pivot
+* block D, so that each column contains 1, to reduce the
+* number of FLOPS when we multiply panel
+* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
+*
+* D**(-1) = ( d11 cj(d21) )**(-1) =
+* ( d21 d22 )
+*
+* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
+* ( (-d21) ( d11 ) )
+*
+* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
+*
+* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
+* ( ( -1 ) ( d11/conj(d21) ) )
+*
+* = 1/(|d21|**2) * 1/(D22*D11-1) *
+*
+* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = ( conj(D21)*( D11 ) D21*( -1 ) )
+* ( ( -1 ) ( D22 ) ),
+*
+* where D11 = d22/d21,
+* D22 = d11/conj(d21),
+* D21 = T/d21,
+* T = 1/(D22*D11-1).
+*
+* (NOTE: No need to check for division by ZERO,
+* since that was ensured earlier in pivot search:
+* (a) d21 != 0, since in 2x2 pivot case(4)
+* |d21| should be larger than |d11| and |d22|;
+* (b) (D22*D11 - 1) != 0, since from (a),
+* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
*
D21 = W( K-1, KW )
D11 = W( K, KW ) / DCONJG( D21 )
D22 = W( K-1, KW-1 ) / D21
T = ONE / ( DBLE( D11*D22 )-ONE )
D21 = T / D21
+*
+* Update elements in columns A(k-1) and A(k) as
+* dot products of rows of ( W(kw-1) W(kw) ) and columns
+* of D**(-1)
+*
DO 20 J = 1, K - 2
A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) )
A( J, K ) = DCONJG( D21 )*
@@ -319,11 +522,13 @@
A( K-1, K ) = W( K-1, KW )
A( K, K ) = W( K, KW )
*
-* Conjugate W(k) and W(k-1)
+* (2) Conjugate columns W(kw) and W(kw-1)
*
CALL ZLACGV( K-1, W( 1, KW ), 1 )
CALL ZLACGV( K-2, W( 1, KW-1 ), 1 )
+*
END IF
+*
END IF
*
* Store details of the interchanges in IPIV
@@ -344,7 +549,7 @@
*
* Update the upper triangle of A11 (= A(1:k,1:k)) as
*
-* A11 := A11 - U12*D*U12' = A11 - U12*W'
+* A11 := A11 - U12*D*U12**H = A11 - U12*W**H
*
* computing blocks of NB columns at a time (note that conjg(W) is
* actually stored)
@@ -370,20 +575,28 @@
50 CONTINUE
*
* Put U12 in standard form by partially undoing the interchanges
-* in columns k+1:n
+* in columns k+1:n looping backwards from k+1 to n
*
J = K + 1
60 CONTINUE
- JJ = J
- JP = IPIV( J )
- IF( JP.LT.0 ) THEN
- JP = -JP
+*
+* Undo the interchanges (if any) of rows JJ and JP at each
+* step J
+*
+* (Here, J is a diagonal index)
+ JJ = J
+ JP = IPIV( J )
+ IF( JP.LT.0 ) THEN
+ JP = -JP
+* (Here, J is a diagonal index)
+ J = J + 1
+ END IF
+* (NOTE: Here, J is used to determine row length. Length N-J+1
+* of the rows to swap back doesn't include diagonal element)
J = J + 1
- END IF
- J = J + 1
- IF( JP.NE.JJ .AND. J.LE.N )
- $ CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
- IF( J.LE.N )
+ IF( JP.NE.JJ .AND. J.LE.N )
+ $ CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
+ IF( J.LT.N )
$ GO TO 60
*
* Set KB to the number of columns factorized
@@ -406,6 +619,8 @@
IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N )
$ GO TO 90
*
+ KSTEP = 1
+*
* Copy column K of A to column K of W and update it
*
W( K, K ) = DBLE( A( K, K ) )
@@ -415,15 +630,14 @@
$ W( K, 1 ), LDW, CONE, W( K, K ), 1 )
W( K, K ) = DBLE( W( K, K ) )
*
- KSTEP = 1
-*
* Determine rows and columns to be interchanged and whether
* a 1-by-1 or 2-by-2 pivot block will be used
*
ABSAKK = ABS( DBLE( W( K, K ) ) )
*
* IMAX is the row-index of the largest off-diagonal element in
-* column K, and COLMAX is its absolute value
+* column K, and COLMAX is its absolute value.
+* Determine both COLMAX and IMAX.
*
IF( K.LT.N ) THEN
IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 )
@@ -434,13 +648,19 @@
*
IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
*
-* Column K is zero: set INFO and continue
+* Column K is zero or underflow: set INFO and continue
*
IF( INFO.EQ.0 )
$ INFO = K
KP = K
A( K, K ) = DBLE( A( K, K ) )
ELSE
+*
+* ============================================================
+*
+* BEGIN pivot search
+*
+* Case(1)
IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
*
* no interchange, use 1-by-1 pivot block
@@ -448,6 +668,9 @@
KP = K
ELSE
*
+* BEGIN pivot search along IMAX row
+*
+*
* Copy column IMAX to column K+1 of W and update it
*
CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 )
@@ -462,7 +685,8 @@
W( IMAX, K+1 ) = DBLE( W( IMAX, K+1 ) )
*
* JMAX is the column-index of the largest off-diagonal
-* element in row IMAX, and ROWMAX is its absolute value
+* element in row IMAX, and ROWMAX is its absolute value.
+* Determine only ROWMAX.
*
JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 )
ROWMAX = CABS1( W( JMAX, K+1 ) )
@@ -471,11 +695,14 @@
ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) )
END IF
*
+* Case(2)
IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
*
* no interchange, use 1-by-1 pivot block
*
KP = K
+*
+* Case(3)
ELSE IF( ABS( DBLE( W( IMAX, K+1 ) ) ).GE.ALPHA*ROWMAX )
$ THEN
*
@@ -484,9 +711,11 @@
*
KP = IMAX
*
-* copy column K+1 of W to column K
+* copy column K+1 of W to column K of W
*
CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 )
+*
+* Case(4)
ELSE
*
* interchange rows and columns K+1 and IMAX, use 2-by-2
@@ -495,15 +724,29 @@
KP = IMAX
KSTEP = 2
END IF
+*
+*
+* END pivot search along IMAX row
+*
END IF
*
+* END pivot search
+*
+* ============================================================
+*
+* KK is the column of A where pivoting step stopped
+*
KK = K + KSTEP - 1
*
-* Updated column KP is already stored in column KK of W
+* Interchange rows and columns KP and KK.
+* Updated column KP is already stored in column KK of W.
*
IF( KP.NE.KK ) THEN
*
-* Copy non-updated column KK to column KP
+* Copy non-updated column KK to column KP of submatrix A
+* at step K. No need to copy element into column K
+* (or K and K+1 for 2-by-2 pivot) of A, since these columns
+* will be later overwritten.
*
A( KP, KP ) = DBLE( A( KK, KK ) )
CALL ZCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ),
@@ -512,9 +755,13 @@
IF( KP.LT.N )
$ CALL ZCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
*
-* Interchange rows KK and KP in first KK columns of A and W
+* Interchange rows KK and KP in first K-1 columns of A
+* (columns K (or K and K+1 for 2-by-2 pivot) of A will be
+* later overwritten). Interchange rows KK and KP
+* in first KK columns of W.
*
- CALL ZSWAP( KK-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
+ IF( K.GT.1 )
+ $ CALL ZSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW )
END IF
*
@@ -522,21 +769,35 @@
*
* 1-by-1 pivot block D(k): column k of W now holds
*
-* W(k) = L(k)*D(k)
+* W(k) = L(k)*D(k),
*
* where L(k) is the k-th column of L
*
-* Store L(k) in column k of A
-*
+* (1) Store subdiag. elements of column L(k)
+* and 1-by-1 block D(k) in column k of A.
+* (NOTE: Diagonal element L(k,k) is a UNIT element
+* and not stored)
+* A(k,k) := D(k,k) = W(k,k)
+* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k)
+*
+* (NOTE: No need to use for Hermitian matrix
+* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal
+* element D(k,k) from W (potentially saves only one load))
CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 )
IF( K.LT.N ) THEN
+*
+* (NOTE: No need to check if A(k,k) is NOT ZERO,
+* since that was ensured earlier in pivot search:
+* case A(k,k) = 0 falls into 2x2 pivot case(4))
+*
R1 = ONE / DBLE( A( K, K ) )
CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 )
*
-* Conjugate W(k)
+* (2) Conjugate column W(k)
*
CALL ZLACGV( N-K, W( K+1, K ), 1 )
END IF
+*
ELSE
*
* 2-by-2 pivot block D(k): columns k and k+1 of W now hold
@@ -546,15 +807,68 @@
* where L(k) and L(k+1) are the k-th and (k+1)-th columns
* of L
*
+* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2
+* block D(k:k+1,k:k+1) in columns k and k+1 of A.
+* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT
+* block and not stored)
+* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1)
+* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) =
+* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) )
+*
IF( K.LT.N-1 ) THEN
*
-* Store L(k) and L(k+1) in columns k and k+1 of A
+* Factor out the columns of the inverse of 2-by-2 pivot
+* block D, so that each column contains 1, to reduce the
+* number of FLOPS when we multiply panel
+* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
+*
+* D**(-1) = ( d11 cj(d21) )**(-1) =
+* ( d21 d22 )
+*
+* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
+* ( (-d21) ( d11 ) )
+*
+* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
+*
+* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
+* ( ( -1 ) ( d11/conj(d21) ) )
+*
+* = 1/(|d21|**2) * 1/(D22*D11-1) *
+*
+* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = ( conj(D21)*( D11 ) D21*( -1 ) )
+* ( ( -1 ) ( D22 ) ),
+*
+* where D11 = d22/d21,
+* D22 = d11/conj(d21),
+* D21 = T/d21,
+* T = 1/(D22*D11-1).
+*
+* (NOTE: No need to check for division by ZERO,
+* since that was ensured earlier in pivot search:
+* (a) d21 != 0, since in 2x2 pivot case(4)
+* |d21| should be larger than |d11| and |d22|;
+* (b) (D22*D11 - 1) != 0, since from (a),
+* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
*
D21 = W( K+1, K )
D11 = W( K+1, K+1 ) / D21
D22 = W( K, K ) / DCONJG( D21 )
T = ONE / ( DBLE( D11*D22 )-ONE )
D21 = T / D21
+*
+* Update elements in columns A(k) and A(k+1) as
+* dot products of rows of ( W(k) W(k+1) ) and columns
+* of D**(-1)
+*
DO 80 J = K + 2, N
A( J, K ) = DCONJG( D21 )*
$ ( D11*W( J, K )-W( J, K+1 ) )
@@ -568,11 +882,13 @@
A( K+1, K ) = W( K+1, K )
A( K+1, K+1 ) = W( K+1, K+1 )
*
-* Conjugate W(k) and W(k+1)
+* (2) Conjugate columns W(k) and W(k+1)
*
CALL ZLACGV( N-K, W( K+1, K ), 1 )
CALL ZLACGV( N-K-1, W( K+2, K+1 ), 1 )
+*
END IF
+*
END IF
*
* Store details of the interchanges in IPIV
@@ -593,7 +909,7 @@
*
* Update the lower triangle of A22 (= A(k:n,k:n)) as
*
-* A22 := A22 - L21*D*L21' = A22 - L21*W'
+* A22 := A22 - L21*D*L21**H = A22 - L21*W**H
*
* computing blocks of NB columns at a time (note that conjg(W) is
* actually stored)
@@ -620,20 +936,28 @@
110 CONTINUE
*
* Put L21 in standard form by partially undoing the interchanges
-* in columns 1:k-1
+* of rows in columns 1:k-1 looping backwards from k-1 to 1
*
J = K - 1
120 CONTINUE
- JJ = J
- JP = IPIV( J )
- IF( JP.LT.0 ) THEN
- JP = -JP
+*
+* Undo the interchanges (if any) of rows JJ and JP at each
+* step J
+*
+* (Here, J is a diagonal index)
+ JJ = J
+ JP = IPIV( J )
+ IF( JP.LT.0 ) THEN
+ JP = -JP
+* (Here, J is a diagonal index)
+ J = J - 1
+ END IF
+* (NOTE: Here, J is used to determine row length. Length J
+* of the rows to swap back doesn't include diagonal element)
J = J - 1
- END IF
- J = J - 1
- IF( JP.NE.JJ .AND. J.GE.1 )
- $ CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
- IF( J.GE.1 )
+ IF( JP.NE.JJ .AND. J.GE.1 )
+ $ CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
+ IF( J.GT.1 )
$ GO TO 120
*
* Set KB to the number of columns factorized