--- rpl/lapack/lapack/zlahef.f 2012/12/14 14:22:50 1.13
+++ rpl/lapack/lapack/zlahef.f 2023/08/07 08:39:29 1.20
@@ -1,25 +1,25 @@
-*> \brief \b ZLAHEF computes a partial factorization of a complex Hermitian indefinite matrix, using the diagonal pivoting method.
+*> \brief \b ZLAHEF computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kaufman diagonal pivoting method (blocked algorithm, calling Level 3 BLAS).
*
* =========== DOCUMENTATION ===========
*
-* Online html documentation available at
-* http://www.netlib.org/lapack/explore-html/
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
-*> Download ZLAHEF + dependencies
-*>
-*> [TGZ]
-*>
-*> [ZIP]
-*>
+*> Download ZLAHEF + dependencies
+*>
+*> [TGZ]
+*>
+*> [ZIP]
+*>
*> [TXT]
-*> \endhtmlonly
+*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE ZLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
-*
+*
* .. Scalar Arguments ..
* CHARACTER UPLO
* INTEGER INFO, KB, LDA, LDW, N, NB
@@ -28,7 +28,7 @@
* INTEGER IPIV( * )
* COMPLEX*16 A( LDA, * ), W( LDW, * )
* ..
-*
+*
*
*> \par Purpose:
* =============
@@ -110,16 +110,26 @@
*> \verbatim
*> IPIV is INTEGER array, dimension (N)
*> Details of the interchanges and the block structure of D.
-*> If UPLO = 'U', only the last KB elements of IPIV are set;
-*> if UPLO = 'L', only the first KB elements are set.
*>
-*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were
-*> interchanged and D(k,k) is a 1-by-1 diagonal block.
-*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
-*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
-*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) =
-*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
-*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*> If UPLO = 'U':
+*> Only the last KB elements of IPIV are set.
+*>
+*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*> interchanged and D(k,k) is a 1-by-1 diagonal block.
+*>
+*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns
+*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*> is a 2-by-2 diagonal block.
+*>
+*> If UPLO = 'L':
+*> Only the first KB elements of IPIV are set.
+*>
+*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*> interchanged and D(k,k) is a 1-by-1 diagonal block.
+*>
+*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns
+*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1)
+*> is a 2-by-2 diagonal block.
*> \endverbatim
*>
*> \param[out] W
@@ -145,22 +155,29 @@
* Authors:
* ========
*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date September 2012
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
*
*> \ingroup complex16HEcomputational
*
+*> \par Contributors:
+* ==================
+*>
+*> \verbatim
+*>
+*> December 2016, Igor Kozachenko,
+*> Computer Science Division,
+*> University of California, Berkeley
+*> \endverbatim
+*
* =====================================================================
SUBROUTINE ZLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
*
-* -- LAPACK computational routine (version 3.4.2) --
+* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-* September 2012
*
* .. Scalar Arguments ..
CHARACTER UPLO
@@ -231,6 +248,8 @@
IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 )
$ GO TO 30
*
+ KSTEP = 1
+*
* Copy column K of A to column KW of W and update it
*
CALL ZCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 )
@@ -241,15 +260,14 @@
W( K, KW ) = DBLE( W( K, KW ) )
END IF
*
- KSTEP = 1
-*
* Determine rows and columns to be interchanged and whether
* a 1-by-1 or 2-by-2 pivot block will be used
*
ABSAKK = ABS( DBLE( W( K, KW ) ) )
*
* IMAX is the row-index of the largest off-diagonal element in
-* column K, and COLMAX is its absolute value
+* column K, and COLMAX is its absolute value.
+* Determine both COLMAX and IMAX.
*
IF( K.GT.1 ) THEN
IMAX = IZAMAX( K-1, W( 1, KW ), 1 )
@@ -260,13 +278,19 @@
*
IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
*
-* Column K is zero: set INFO and continue
+* Column K is zero or underflow: set INFO and continue
*
IF( INFO.EQ.0 )
$ INFO = K
KP = K
A( K, K ) = DBLE( A( K, K ) )
ELSE
+*
+* ============================================================
+*
+* BEGIN pivot search
+*
+* Case(1)
IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
*
* no interchange, use 1-by-1 pivot block
@@ -274,6 +298,9 @@
KP = K
ELSE
*
+* BEGIN pivot search along IMAX row
+*
+*
* Copy column IMAX to column KW-1 of W and update it
*
CALL ZCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 )
@@ -289,7 +316,8 @@
END IF
*
* JMAX is the column-index of the largest off-diagonal
-* element in row IMAX, and ROWMAX is its absolute value
+* element in row IMAX, and ROWMAX is its absolute value.
+* Determine only ROWMAX.
*
JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 )
ROWMAX = CABS1( W( JMAX, KW-1 ) )
@@ -298,11 +326,14 @@
ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) )
END IF
*
+* Case(2)
IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
*
* no interchange, use 1-by-1 pivot block
*
KP = K
+*
+* Case(3)
ELSE IF( ABS( DBLE( W( IMAX, KW-1 ) ) ).GE.ALPHA*ROWMAX )
$ THEN
*
@@ -311,9 +342,11 @@
*
KP = IMAX
*
-* copy column KW-1 of W to column KW
+* copy column KW-1 of W to column KW of W
*
CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 )
+*
+* Case(4)
ELSE
*
* interchange rows and columns K-1 and IMAX, use 2-by-2
@@ -322,27 +355,48 @@
KP = IMAX
KSTEP = 2
END IF
+*
+*
+* END pivot search along IMAX row
+*
END IF
*
+* END pivot search
+*
+* ============================================================
+*
+* KK is the column of A where pivoting step stopped
+*
KK = K - KSTEP + 1
+*
+* KKW is the column of W which corresponds to column KK of A
+*
KKW = NB + KK - N
*
-* Updated column KP is already stored in column KKW of W
+* Interchange rows and columns KP and KK.
+* Updated column KP is already stored in column KKW of W.
*
IF( KP.NE.KK ) THEN
*
-* Copy non-updated column KK to column KP
+* Copy non-updated column KK to column KP of submatrix A
+* at step K. No need to copy element into column K
+* (or K and K-1 for 2-by-2 pivot) of A, since these columns
+* will be later overwritten.
*
A( KP, KP ) = DBLE( A( KK, KK ) )
CALL ZCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ),
$ LDA )
CALL ZLACGV( KK-1-KP, A( KP, KP+1 ), LDA )
- CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+ IF( KP.GT.1 )
+ $ CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
*
-* Interchange rows KK and KP in last KK columns of A and W
+* Interchange rows KK and KP in last K+1 to N columns of A
+* (columns K (or K and K-1 for 2-by-2 pivot) of A will be
+* later overwritten). Interchange rows KK and KP
+* in last KKW to NB columns of W.
*
- IF( KK.LT.N )
- $ CALL ZSWAP( N-KK, A( KK, KK+1 ), LDA, A( KP, KK+1 ),
+ IF( K.LT.N )
+ $ CALL ZSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ),
$ LDA )
CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ),
$ LDW )
@@ -350,40 +404,108 @@
*
IF( KSTEP.EQ.1 ) THEN
*
-* 1-by-1 pivot block D(k): column KW of W now holds
+* 1-by-1 pivot block D(k): column kw of W now holds
*
-* W(k) = U(k)*D(k)
+* W(kw) = U(k)*D(k),
*
* where U(k) is the k-th column of U
*
-* Store U(k) in column k of A
-*
+* (1) Store subdiag. elements of column U(k)
+* and 1-by-1 block D(k) in column k of A.
+* (NOTE: Diagonal element U(k,k) is a UNIT element
+* and not stored)
+* A(k,k) := D(k,k) = W(k,kw)
+* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k)
+*
+* (NOTE: No need to use for Hermitian matrix
+* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal
+* element D(k,k) from W (potentially saves only one load))
CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 )
- R1 = ONE / DBLE( A( K, K ) )
- CALL ZDSCAL( K-1, R1, A( 1, K ), 1 )
+ IF( K.GT.1 ) THEN
*
-* Conjugate W(k)
+* (NOTE: No need to check if A(k,k) is NOT ZERO,
+* since that was ensured earlier in pivot search:
+* case A(k,k) = 0 falls into 2x2 pivot case(4))
+*
+ R1 = ONE / DBLE( A( K, K ) )
+ CALL ZDSCAL( K-1, R1, A( 1, K ), 1 )
+*
+* (2) Conjugate column W(kw)
+*
+ CALL ZLACGV( K-1, W( 1, KW ), 1 )
+ END IF
*
- CALL ZLACGV( K-1, W( 1, KW ), 1 )
ELSE
*
-* 2-by-2 pivot block D(k): columns KW and KW-1 of W now
-* hold
+* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold
*
-* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k)
*
* where U(k) and U(k-1) are the k-th and (k-1)-th columns
* of U
*
+* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2
+* block D(k-1:k,k-1:k) in columns k-1 and k of A.
+* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT
+* block and not stored)
+* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw)
+* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) =
+* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) )
+*
IF( K.GT.2 ) THEN
*
-* Store U(k) and U(k-1) in columns k and k-1 of A
+* Factor out the columns of the inverse of 2-by-2 pivot
+* block D, so that each column contains 1, to reduce the
+* number of FLOPS when we multiply panel
+* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
+*
+* D**(-1) = ( d11 cj(d21) )**(-1) =
+* ( d21 d22 )
+*
+* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
+* ( (-d21) ( d11 ) )
+*
+* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
+*
+* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
+* ( ( -1 ) ( d11/conj(d21) ) )
+*
+* = 1/(|d21|**2) * 1/(D22*D11-1) *
+*
+* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = ( conj(D21)*( D11 ) D21*( -1 ) )
+* ( ( -1 ) ( D22 ) ),
+*
+* where D11 = d22/d21,
+* D22 = d11/conj(d21),
+* D21 = T/d21,
+* T = 1/(D22*D11-1).
+*
+* (NOTE: No need to check for division by ZERO,
+* since that was ensured earlier in pivot search:
+* (a) d21 != 0, since in 2x2 pivot case(4)
+* |d21| should be larger than |d11| and |d22|;
+* (b) (D22*D11 - 1) != 0, since from (a),
+* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
*
D21 = W( K-1, KW )
D11 = W( K, KW ) / DCONJG( D21 )
D22 = W( K-1, KW-1 ) / D21
T = ONE / ( DBLE( D11*D22 )-ONE )
D21 = T / D21
+*
+* Update elements in columns A(k-1) and A(k) as
+* dot products of rows of ( W(kw-1) W(kw) ) and columns
+* of D**(-1)
+*
DO 20 J = 1, K - 2
A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) )
A( J, K ) = DCONJG( D21 )*
@@ -397,11 +519,13 @@
A( K-1, K ) = W( K-1, KW )
A( K, K ) = W( K, KW )
*
-* Conjugate W(k) and W(k-1)
+* (2) Conjugate columns W(kw) and W(kw-1)
*
CALL ZLACGV( K-1, W( 1, KW ), 1 )
CALL ZLACGV( K-2, W( 1, KW-1 ), 1 )
+*
END IF
+*
END IF
*
* Store details of the interchanges in IPIV
@@ -448,20 +572,28 @@
50 CONTINUE
*
* Put U12 in standard form by partially undoing the interchanges
-* in columns k+1:n
+* in columns k+1:n looping backwards from k+1 to n
*
J = K + 1
60 CONTINUE
- JJ = J
- JP = IPIV( J )
- IF( JP.LT.0 ) THEN
- JP = -JP
+*
+* Undo the interchanges (if any) of rows JJ and JP at each
+* step J
+*
+* (Here, J is a diagonal index)
+ JJ = J
+ JP = IPIV( J )
+ IF( JP.LT.0 ) THEN
+ JP = -JP
+* (Here, J is a diagonal index)
+ J = J + 1
+ END IF
+* (NOTE: Here, J is used to determine row length. Length N-J+1
+* of the rows to swap back doesn't include diagonal element)
J = J + 1
- END IF
- J = J + 1
- IF( JP.NE.JJ .AND. J.LE.N )
- $ CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
- IF( J.LE.N )
+ IF( JP.NE.JJ .AND. J.LE.N )
+ $ CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
+ IF( J.LT.N )
$ GO TO 60
*
* Set KB to the number of columns factorized
@@ -484,6 +616,8 @@
IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N )
$ GO TO 90
*
+ KSTEP = 1
+*
* Copy column K of A to column K of W and update it
*
W( K, K ) = DBLE( A( K, K ) )
@@ -493,15 +627,14 @@
$ W( K, 1 ), LDW, CONE, W( K, K ), 1 )
W( K, K ) = DBLE( W( K, K ) )
*
- KSTEP = 1
-*
* Determine rows and columns to be interchanged and whether
* a 1-by-1 or 2-by-2 pivot block will be used
*
ABSAKK = ABS( DBLE( W( K, K ) ) )
*
* IMAX is the row-index of the largest off-diagonal element in
-* column K, and COLMAX is its absolute value
+* column K, and COLMAX is its absolute value.
+* Determine both COLMAX and IMAX.
*
IF( K.LT.N ) THEN
IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 )
@@ -512,13 +645,19 @@
*
IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
*
-* Column K is zero: set INFO and continue
+* Column K is zero or underflow: set INFO and continue
*
IF( INFO.EQ.0 )
$ INFO = K
KP = K
A( K, K ) = DBLE( A( K, K ) )
ELSE
+*
+* ============================================================
+*
+* BEGIN pivot search
+*
+* Case(1)
IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
*
* no interchange, use 1-by-1 pivot block
@@ -526,6 +665,9 @@
KP = K
ELSE
*
+* BEGIN pivot search along IMAX row
+*
+*
* Copy column IMAX to column K+1 of W and update it
*
CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 )
@@ -540,7 +682,8 @@
W( IMAX, K+1 ) = DBLE( W( IMAX, K+1 ) )
*
* JMAX is the column-index of the largest off-diagonal
-* element in row IMAX, and ROWMAX is its absolute value
+* element in row IMAX, and ROWMAX is its absolute value.
+* Determine only ROWMAX.
*
JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 )
ROWMAX = CABS1( W( JMAX, K+1 ) )
@@ -549,11 +692,14 @@
ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) )
END IF
*
+* Case(2)
IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
*
* no interchange, use 1-by-1 pivot block
*
KP = K
+*
+* Case(3)
ELSE IF( ABS( DBLE( W( IMAX, K+1 ) ) ).GE.ALPHA*ROWMAX )
$ THEN
*
@@ -562,9 +708,11 @@
*
KP = IMAX
*
-* copy column K+1 of W to column K
+* copy column K+1 of W to column K of W
*
CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 )
+*
+* Case(4)
ELSE
*
* interchange rows and columns K+1 and IMAX, use 2-by-2
@@ -573,15 +721,29 @@
KP = IMAX
KSTEP = 2
END IF
+*
+*
+* END pivot search along IMAX row
+*
END IF
*
+* END pivot search
+*
+* ============================================================
+*
+* KK is the column of A where pivoting step stopped
+*
KK = K + KSTEP - 1
*
-* Updated column KP is already stored in column KK of W
+* Interchange rows and columns KP and KK.
+* Updated column KP is already stored in column KK of W.
*
IF( KP.NE.KK ) THEN
*
-* Copy non-updated column KK to column KP
+* Copy non-updated column KK to column KP of submatrix A
+* at step K. No need to copy element into column K
+* (or K and K+1 for 2-by-2 pivot) of A, since these columns
+* will be later overwritten.
*
A( KP, KP ) = DBLE( A( KK, KK ) )
CALL ZCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ),
@@ -590,9 +752,13 @@
IF( KP.LT.N )
$ CALL ZCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
*
-* Interchange rows KK and KP in first KK columns of A and W
+* Interchange rows KK and KP in first K-1 columns of A
+* (columns K (or K and K+1 for 2-by-2 pivot) of A will be
+* later overwritten). Interchange rows KK and KP
+* in first KK columns of W.
*
- CALL ZSWAP( KK-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
+ IF( K.GT.1 )
+ $ CALL ZSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW )
END IF
*
@@ -600,21 +766,35 @@
*
* 1-by-1 pivot block D(k): column k of W now holds
*
-* W(k) = L(k)*D(k)
+* W(k) = L(k)*D(k),
*
* where L(k) is the k-th column of L
*
-* Store L(k) in column k of A
-*
+* (1) Store subdiag. elements of column L(k)
+* and 1-by-1 block D(k) in column k of A.
+* (NOTE: Diagonal element L(k,k) is a UNIT element
+* and not stored)
+* A(k,k) := D(k,k) = W(k,k)
+* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k)
+*
+* (NOTE: No need to use for Hermitian matrix
+* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal
+* element D(k,k) from W (potentially saves only one load))
CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 )
IF( K.LT.N ) THEN
+*
+* (NOTE: No need to check if A(k,k) is NOT ZERO,
+* since that was ensured earlier in pivot search:
+* case A(k,k) = 0 falls into 2x2 pivot case(4))
+*
R1 = ONE / DBLE( A( K, K ) )
CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 )
*
-* Conjugate W(k)
+* (2) Conjugate column W(k)
*
CALL ZLACGV( N-K, W( K+1, K ), 1 )
END IF
+*
ELSE
*
* 2-by-2 pivot block D(k): columns k and k+1 of W now hold
@@ -624,15 +804,68 @@
* where L(k) and L(k+1) are the k-th and (k+1)-th columns
* of L
*
+* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2
+* block D(k:k+1,k:k+1) in columns k and k+1 of A.
+* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT
+* block and not stored)
+* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1)
+* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) =
+* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) )
+*
IF( K.LT.N-1 ) THEN
*
-* Store L(k) and L(k+1) in columns k and k+1 of A
+* Factor out the columns of the inverse of 2-by-2 pivot
+* block D, so that each column contains 1, to reduce the
+* number of FLOPS when we multiply panel
+* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
+*
+* D**(-1) = ( d11 cj(d21) )**(-1) =
+* ( d21 d22 )
+*
+* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
+* ( (-d21) ( d11 ) )
+*
+* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
+*
+* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
+* ( ( -1 ) ( d11/conj(d21) ) )
+*
+* = 1/(|d21|**2) * 1/(D22*D11-1) *
+*
+* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
+* ( ( -1 ) ( D22 ) )
+*
+* = ( conj(D21)*( D11 ) D21*( -1 ) )
+* ( ( -1 ) ( D22 ) ),
+*
+* where D11 = d22/d21,
+* D22 = d11/conj(d21),
+* D21 = T/d21,
+* T = 1/(D22*D11-1).
+*
+* (NOTE: No need to check for division by ZERO,
+* since that was ensured earlier in pivot search:
+* (a) d21 != 0, since in 2x2 pivot case(4)
+* |d21| should be larger than |d11| and |d22|;
+* (b) (D22*D11 - 1) != 0, since from (a),
+* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
*
D21 = W( K+1, K )
D11 = W( K+1, K+1 ) / D21
D22 = W( K, K ) / DCONJG( D21 )
T = ONE / ( DBLE( D11*D22 )-ONE )
D21 = T / D21
+*
+* Update elements in columns A(k) and A(k+1) as
+* dot products of rows of ( W(k) W(k+1) ) and columns
+* of D**(-1)
+*
DO 80 J = K + 2, N
A( J, K ) = DCONJG( D21 )*
$ ( D11*W( J, K )-W( J, K+1 ) )
@@ -646,11 +879,13 @@
A( K+1, K ) = W( K+1, K )
A( K+1, K+1 ) = W( K+1, K+1 )
*
-* Conjugate W(k) and W(k+1)
+* (2) Conjugate columns W(k) and W(k+1)
*
CALL ZLACGV( N-K, W( K+1, K ), 1 )
CALL ZLACGV( N-K-1, W( K+2, K+1 ), 1 )
+*
END IF
+*
END IF
*
* Store details of the interchanges in IPIV
@@ -698,20 +933,28 @@
110 CONTINUE
*
* Put L21 in standard form by partially undoing the interchanges
-* in columns 1:k-1
+* of rows in columns 1:k-1 looping backwards from k-1 to 1
*
J = K - 1
120 CONTINUE
- JJ = J
- JP = IPIV( J )
- IF( JP.LT.0 ) THEN
- JP = -JP
+*
+* Undo the interchanges (if any) of rows JJ and JP at each
+* step J
+*
+* (Here, J is a diagonal index)
+ JJ = J
+ JP = IPIV( J )
+ IF( JP.LT.0 ) THEN
+ JP = -JP
+* (Here, J is a diagonal index)
+ J = J - 1
+ END IF
+* (NOTE: Here, J is used to determine row length. Length J
+* of the rows to swap back doesn't include diagonal element)
J = J - 1
- END IF
- J = J - 1
- IF( JP.NE.JJ .AND. J.GE.1 )
- $ CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
- IF( J.GE.1 )
+ IF( JP.NE.JJ .AND. J.GE.1 )
+ $ CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
+ IF( J.GT.1 )
$ GO TO 120
*
* Set KB to the number of columns factorized