--- rpl/lapack/lapack/dlasq2.f 2010/01/26 15:22:46 1.1 +++ rpl/lapack/lapack/dlasq2.f 2023/08/07 08:38:59 1.18 @@ -1,12 +1,116 @@ - SUBROUTINE DLASQ2( N, Z, INFO ) +*> \brief \b DLASQ2 computes all the eigenvalues of the symmetric positive definite tridiagonal matrix associated with the qd Array Z to high relative accuracy. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== * -* -- LAPACK routine (version 3.2) -- +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ * -* -- Contributed by Osni Marques of the Lawrence Berkeley National -- -* -- Laboratory and Beresford Parlett of the Univ. of California at -- -* -- Berkeley -- -* -- November 2008 -- +*> \htmlonly +*> Download DLASQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ2( N, Z, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ2 computes all the eigenvalues of the symmetric positive +*> definite tridiagonal matrix associated with the qd array Z to high +*> relative accuracy are computed to high relative accuracy, in the +*> absence of denormalization, underflow and overflow. +*> +*> To see the relation of Z to the tridiagonal matrix, let L be a +*> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and +*> let U be an upper bidiagonal matrix with 1's above and diagonal +*> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the +*> symmetric tridiagonal to which it is similar. +*> +*> Note : DLASQ2 defines a logical variable, IEEE, which is true +*> on machines which follow ieee-754 floating-point standard in their +*> handling of infinities and NaNs, and false otherwise. This variable +*> is passed to DLASQ3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> On entry Z holds the qd array. On exit, entries 1 to N hold +*> the eigenvalues in decreasing order, Z( 2*N+1 ) holds the +*> trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If +*> N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) +*> holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of +*> shifts that failed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if the i-th argument is a scalar and had an illegal +*> value, then INFO = -i, if the i-th argument is an +*> array and the j-entry had an illegal value, then +*> INFO = -(i*100+j) +*> > 0: the algorithm failed +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 100*N +*> iterations (in inner while loop). On exit Z holds +*> a qd array with the same eigenvalues as the given Z. +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Local Variables: I0:N0 defines a current unreduced segment of Z. +*> The shifts are accumulated in SIGMA. Iteration count is in ITER. +*> Ping-pong is controlled by PP (alternates between 0 and 1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASQ2( N, Z, INFO ) * +* -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * @@ -17,58 +121,6 @@ DOUBLE PRECISION Z( * ) * .. * -* Purpose -* ======= -* -* DLASQ2 computes all the eigenvalues of the symmetric positive -* definite tridiagonal matrix associated with the qd array Z to high -* relative accuracy are computed to high relative accuracy, in the -* absence of denormalization, underflow and overflow. -* -* To see the relation of Z to the tridiagonal matrix, let L be a -* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and -* let U be an upper bidiagonal matrix with 1's above and diagonal -* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the -* symmetric tridiagonal to which it is similar. -* -* Note : DLASQ2 defines a logical variable, IEEE, which is true -* on machines which follow ieee-754 floating-point standard in their -* handling of infinities and NaNs, and false otherwise. This variable -* is passed to DLASQ3. -* -* Arguments -* ========= -* -* N (input) INTEGER -* The number of rows and columns in the matrix. N >= 0. -* -* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) -* On entry Z holds the qd array. On exit, entries 1 to N hold -* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the -* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If -* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) -* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of -* shifts that failed. -* -* INFO (output) INTEGER -* = 0: successful exit -* < 0: if the i-th argument is a scalar and had an illegal -* value, then INFO = -i, if the i-th argument is an -* array and the j-entry had an illegal value, then -* INFO = -(i*100+j) -* > 0: the algorithm failed -* = 1, a split was marked by a positive value in E -* = 2, current block of Z not diagonalized after 30*N -* iterations (in inner while loop) -* = 3, termination criterion of outer while loop not met -* (program created more than N unreduced blocks) -* -* Further Details -* =============== -* Local Variables: I0:N0 defines a current unreduced segment of Z. -* The shifts are accumulated in SIGMA. Iteration count is in ITER. -* Ping-pong is controlled by PP (alternates between 0 and 1). -* * ===================================================================== * * .. Parameters .. @@ -80,12 +132,13 @@ * .. * .. Local Scalars .. LOGICAL IEEE - INTEGER I0, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, K, - $ KMIN, N0, NBIG, NDIV, NFAIL, PP, SPLT, TTYPE + INTEGER I0, I1, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, + $ K, KMIN, N0, N1, NBIG, NDIV, NFAIL, PP, SPLT, + $ TTYPE DOUBLE PRECISION D, DEE, DEEMIN, DESIG, DMIN, DMIN1, DMIN2, DN, $ DN1, DN2, E, EMAX, EMIN, EPS, G, OLDEMN, QMAX, $ QMIN, S, SAFMIN, SIGMA, T, TAU, TEMP, TOL, - $ TOL2, TRACE, ZMAX + $ TOL2, TRACE, ZMAX, TEMPE, TEMPQ * .. * .. External Subroutines .. EXTERNAL DLASQ3, DLASRT, XERBLA @@ -99,7 +152,7 @@ INTRINSIC ABS, DBLE, MAX, MIN, SQRT * .. * .. Executable Statements .. -* +* * Test the input arguments. * (in case DLASQ2 is not called by DLASQ1) * @@ -128,10 +181,18 @@ * * 2-by-2 case. * - IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN - INFO = -2 + IF( Z( 1 ).LT.ZERO ) THEN + INFO = -201 CALL XERBLA( 'DLASQ2', 2 ) RETURN + ELSE IF( Z( 2 ).LT.ZERO ) THEN + INFO = -202 + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + ELSE IF( Z( 3 ).LT.ZERO ) THEN + INFO = -203 + CALL XERBLA( 'DLASQ2', 2 ) + RETURN ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN D = Z( 3 ) Z( 3 ) = Z( 1 ) @@ -139,7 +200,7 @@ END IF Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 ) IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN - T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) + T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) S = Z( 3 )*( Z( 2 ) / T ) IF( S.LE.T ) THEN S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) ) @@ -208,19 +269,18 @@ Z( 2*N-1 ) = ZERO RETURN END IF -* +* * Check whether the machine is IEEE conformable. -* - IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND. - $ ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 -* +* + IEEE = ( ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 ) +* * Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). * DO 30 K = 2*N, 2, -2 - Z( 2*K ) = ZERO - Z( 2*K-1 ) = Z( K ) - Z( 2*K-2 ) = ZERO - Z( 2*K-3 ) = Z( K-1 ) + Z( 2*K ) = ZERO + Z( 2*K-1 ) = Z( K ) + Z( 2*K-2 ) = ZERO + Z( 2*K-3 ) = Z( K-1 ) 30 CONTINUE * I0 = 1 @@ -277,7 +337,7 @@ D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) ) END IF EMIN = MIN( EMIN, Z( I4-2*PP ) ) - 60 CONTINUE + 60 CONTINUE Z( 4*N0-PP-2 ) = D * * Now find qmax. @@ -308,14 +368,14 @@ NDIV = 2*( N0-I0 ) * DO 160 IWHILA = 1, N + 1 - IF( N0.LT.1 ) + IF( N0.LT.1 ) $ GO TO 170 * -* While array unfinished do +* While array unfinished do * * E(N0) holds the value of SIGMA when submatrix in I0:N0 * splits from the rest of the array, but is negated. -* +* DESIG = ZERO IF( N0.EQ.N ) THEN SIGMA = ZERO @@ -330,7 +390,7 @@ * Find last unreduced submatrix's top index I0, find QMAX and * EMIN. Find Gershgorin-type bound if Q's much greater than E's. * - EMAX = ZERO + EMAX = ZERO IF( N0.GT.I0 ) THEN EMIN = ABS( Z( 4*N0-5 ) ) ELSE @@ -348,7 +408,7 @@ QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) ) EMIN = MIN( EMIN, Z( I4-5 ) ) 90 CONTINUE - I4 = 4 + I4 = 4 * 100 CONTINUE I0 = I4 / 4 @@ -365,7 +425,7 @@ KMIN = ( I4+3 )/4 END IF 110 CONTINUE - IF( (KMIN-I0)*2.LT.N0-KMIN .AND. + IF( (KMIN-I0)*2.LT.N0-KMIN .AND. $ DEEMIN.LE.HALF*Z(4*N0-3) ) THEN IPN4 = 4*( I0+N0 ) PP = 2 @@ -390,15 +450,15 @@ * DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) ) * -* Now I0:N0 is unreduced. +* Now I0:N0 is unreduced. * PP = 0 for ping, PP = 1 for pong. * PP = 2 indicates that flipping was applied to the Z array and -* and that the tests for deflation upon entry in DLASQ3 +* and that the tests for deflation upon entry in DLASQ3 * should not be performed. * - NBIG = 30*( N0-I0+1 ) + NBIG = 100*( N0-I0+1 ) DO 140 IWHILB = 1, NBIG - IF( I0.GT.N0 ) + IF( I0.GT.N0 ) $ GO TO 150 * * While submatrix unfinished take a good dqds step. @@ -441,6 +501,47 @@ 140 CONTINUE * INFO = 2 +* +* Maximum number of iterations exceeded, restore the shift +* SIGMA and place the new d's and e's in a qd array. +* This might need to be done for several blocks +* + I1 = I0 + N1 = N0 + 145 CONTINUE + TEMPQ = Z( 4*I0-3 ) + Z( 4*I0-3 ) = Z( 4*I0-3 ) + SIGMA + DO K = I0+1, N0 + TEMPE = Z( 4*K-5 ) + Z( 4*K-5 ) = Z( 4*K-5 ) * (TEMPQ / Z( 4*K-7 )) + TEMPQ = Z( 4*K-3 ) + Z( 4*K-3 ) = Z( 4*K-3 ) + SIGMA + TEMPE - Z( 4*K-5 ) + END DO +* +* Prepare to do this on the previous block if there is one +* + IF( I1.GT.1 ) THEN + N1 = I1-1 + DO WHILE( ( I1.GE.2 ) .AND. ( Z(4*I1-5).GE.ZERO ) ) + I1 = I1 - 1 + END DO + SIGMA = -Z(4*N1-1) + GO TO 145 + END IF + + DO K = 1, N + Z( 2*K-1 ) = Z( 4*K-3 ) +* +* Only the block 1..N0 is unfinished. The rest of the e's +* must be essentially zero, although sometimes other data +* has been stored in them. +* + IF( K.LT.N0 ) THEN + Z( 2*K ) = Z( 4*K-1 ) + ELSE + Z( 2*K ) = 0 + END IF + END DO RETURN * * end IWHILB @@ -452,16 +553,16 @@ INFO = 3 RETURN * -* end IWHILA +* end IWHILA * 170 CONTINUE -* +* * Move q's to the front. -* +* DO 180 K = 2, N Z( K ) = Z( 4*K-3 ) 180 CONTINUE -* +* * Sort and compute sum of eigenvalues. * CALL DLASRT( 'D', N, Z, IINFO ) @@ -473,7 +574,7 @@ * * Store trace, sum(eigenvalues) and information on performance. * - Z( 2*N+1 ) = TRACE + Z( 2*N+1 ) = TRACE Z( 2*N+2 ) = E Z( 2*N+3 ) = DBLE( ITER ) Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 )