--- rpl/lapack/lapack/iparmq.f 2010/01/26 15:22:46 1.1 +++ rpl/lapack/lapack/iparmq.f 2023/08/07 08:39:14 1.21 @@ -1,173 +1,256 @@ +*> \brief \b IPARMQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download IPARMQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, ISPEC, LWORK, N +* CHARACTER NAME*( * ), OPTS*( * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This program sets problem and machine dependent parameters +*> useful for xHSEQR and related subroutines for eigenvalue +*> problems. It is called whenever +*> IPARMQ is called with 12 <= ISPEC <= 16 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ISPEC +*> \verbatim +*> ISPEC is INTEGER +*> ISPEC specifies which tunable parameter IPARMQ should +*> return. +*> +*> ISPEC=12: (INMIN) Matrices of order nmin or less +*> are sent directly to xLAHQR, the implicit +*> double shift QR algorithm. NMIN must be +*> at least 11. +*> +*> ISPEC=13: (INWIN) Size of the deflation window. +*> This is best set greater than or equal to +*> the number of simultaneous shifts NS. +*> Larger matrices benefit from larger deflation +*> windows. +*> +*> ISPEC=14: (INIBL) Determines when to stop nibbling and +*> invest in an (expensive) multi-shift QR sweep. +*> If the aggressive early deflation subroutine +*> finds LD converged eigenvalues from an order +*> NW deflation window and LD > (NW*NIBBLE)/100, +*> then the next QR sweep is skipped and early +*> deflation is applied immediately to the +*> remaining active diagonal block. Setting +*> IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a +*> multi-shift QR sweep whenever early deflation +*> finds a converged eigenvalue. Setting +*> IPARMQ(ISPEC=14) greater than or equal to 100 +*> prevents TTQRE from skipping a multi-shift +*> QR sweep. +*> +*> ISPEC=15: (NSHFTS) The number of simultaneous shifts in +*> a multi-shift QR iteration. +*> +*> ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the +*> following meanings. +*> 0: During the multi-shift QR/QZ sweep, +*> blocked eigenvalue reordering, blocked +*> Hessenberg-triangular reduction, +*> reflections and/or rotations are not +*> accumulated when updating the +*> far-from-diagonal matrix entries. +*> 1: During the multi-shift QR/QZ sweep, +*> blocked eigenvalue reordering, blocked +*> Hessenberg-triangular reduction, +*> reflections and/or rotations are +*> accumulated, and matrix-matrix +*> multiplication is used to update the +*> far-from-diagonal matrix entries. +*> 2: During the multi-shift QR/QZ sweep, +*> blocked eigenvalue reordering, blocked +*> Hessenberg-triangular reduction, +*> reflections and/or rotations are +*> accumulated, and 2-by-2 block structure +*> is exploited during matrix-matrix +*> multiplies. +*> (If xTRMM is slower than xGEMM, then +*> IPARMQ(ISPEC=16)=1 may be more efficient than +*> IPARMQ(ISPEC=16)=2 despite the greater level of +*> arithmetic work implied by the latter choice.) +*> +*> ISPEC=17: (ICOST) An estimate of the relative cost of flops +*> within the near-the-diagonal shift chase compared +*> to flops within the BLAS calls of a QZ sweep. +*> \endverbatim +*> +*> \param[in] NAME +*> \verbatim +*> NAME is CHARACTER string +*> Name of the calling subroutine +*> \endverbatim +*> +*> \param[in] OPTS +*> \verbatim +*> OPTS is CHARACTER string +*> This is a concatenation of the string arguments to +*> TTQRE. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> N is the order of the Hessenberg matrix H. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The amount of workspace available. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Little is known about how best to choose these parameters. +*> It is possible to use different values of the parameters +*> for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. +*> +*> It is probably best to choose different parameters for +*> different matrices and different parameters at different +*> times during the iteration, but this has not been +*> implemented --- yet. +*> +*> +*> The best choices of most of the parameters depend +*> in an ill-understood way on the relative execution +*> rate of xLAQR3 and xLAQR5 and on the nature of each +*> particular eigenvalue problem. Experiment may be the +*> only practical way to determine which choices are most +*> effective. +*> +*> Following is a list of default values supplied by IPARMQ. +*> These defaults may be adjusted in order to attain better +*> performance in any particular computational environment. +*> +*> IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. +*> Default: 75. (Must be at least 11.) +*> +*> IPARMQ(ISPEC=13) Recommended deflation window size. +*> This depends on ILO, IHI and NS, the +*> number of simultaneous shifts returned +*> by IPARMQ(ISPEC=15). The default for +*> (IHI-ILO+1) <= 500 is NS. The default +*> for (IHI-ILO+1) > 500 is 3*NS/2. +*> +*> IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. +*> +*> IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. +*> a multi-shift QR iteration. +*> +*> If IHI-ILO+1 is ... +*> +*> greater than ...but less ... the +*> or equal to ... than default is +*> +*> 0 30 NS = 2+ +*> 30 60 NS = 4+ +*> 60 150 NS = 10 +*> 150 590 NS = ** +*> 590 3000 NS = 64 +*> 3000 6000 NS = 128 +*> 6000 infinity NS = 256 +*> +*> (+) By default matrices of this order are +*> passed to the implicit double shift routine +*> xLAHQR. See IPARMQ(ISPEC=12) above. These +*> values of NS are used only in case of a rare +*> xLAHQR failure. +*> +*> (**) The asterisks (**) indicate an ad-hoc +*> function increasing from 10 to 64. +*> +*> IPARMQ(ISPEC=16) Select structured matrix multiply. +*> (See ISPEC=16 above for details.) +*> Default: 3. +*> +*> IPARMQ(ISPEC=17) Relative cost heuristic for blocksize selection. +*> Expressed as a percentage. +*> Default: 10. +*> \endverbatim +*> +* ===================================================================== INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK ) * -* -- LAPACK auxiliary routine (version 3.2) -- +* -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2006 -* +* * .. Scalar Arguments .. INTEGER IHI, ILO, ISPEC, LWORK, N CHARACTER NAME*( * ), OPTS*( * ) * -* Purpose -* ======= -* -* This program sets problem and machine dependent parameters -* useful for xHSEQR and its subroutines. It is called whenever -* ILAENV is called with 12 <= ISPEC <= 16 -* -* Arguments -* ========= -* -* ISPEC (input) integer scalar -* ISPEC specifies which tunable parameter IPARMQ should -* return. -* -* ISPEC=12: (INMIN) Matrices of order nmin or less -* are sent directly to xLAHQR, the implicit -* double shift QR algorithm. NMIN must be -* at least 11. -* -* ISPEC=13: (INWIN) Size of the deflation window. -* This is best set greater than or equal to -* the number of simultaneous shifts NS. -* Larger matrices benefit from larger deflation -* windows. -* -* ISPEC=14: (INIBL) Determines when to stop nibbling and -* invest in an (expensive) multi-shift QR sweep. -* If the aggressive early deflation subroutine -* finds LD converged eigenvalues from an order -* NW deflation window and LD.GT.(NW*NIBBLE)/100, -* then the next QR sweep is skipped and early -* deflation is applied immediately to the -* remaining active diagonal block. Setting -* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a -* multi-shift QR sweep whenever early deflation -* finds a converged eigenvalue. Setting -* IPARMQ(ISPEC=14) greater than or equal to 100 -* prevents TTQRE from skipping a multi-shift -* QR sweep. -* -* ISPEC=15: (NSHFTS) The number of simultaneous shifts in -* a multi-shift QR iteration. -* -* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the -* following meanings. -* 0: During the multi-shift QR sweep, -* xLAQR5 does not accumulate reflections and -* does not use matrix-matrix multiply to -* update the far-from-diagonal matrix -* entries. -* 1: During the multi-shift QR sweep, -* xLAQR5 and/or xLAQRaccumulates reflections and uses -* matrix-matrix multiply to update the -* far-from-diagonal matrix entries. -* 2: During the multi-shift QR sweep. -* xLAQR5 accumulates reflections and takes -* advantage of 2-by-2 block structure during -* matrix-matrix multiplies. -* (If xTRMM is slower than xGEMM, then -* IPARMQ(ISPEC=16)=1 may be more efficient than -* IPARMQ(ISPEC=16)=2 despite the greater level of -* arithmetic work implied by the latter choice.) -* -* NAME (input) character string -* Name of the calling subroutine -* -* OPTS (input) character string -* This is a concatenation of the string arguments to -* TTQRE. -* -* N (input) integer scalar -* N is the order of the Hessenberg matrix H. -* -* ILO (input) INTEGER -* IHI (input) INTEGER -* It is assumed that H is already upper triangular -* in rows and columns 1:ILO-1 and IHI+1:N. -* -* LWORK (input) integer scalar -* The amount of workspace available. -* -* Further Details -* =============== -* -* Little is known about how best to choose these parameters. -* It is possible to use different values of the parameters -* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. -* -* It is probably best to choose different parameters for -* different matrices and different parameters at different -* times during the iteration, but this has not been -* implemented --- yet. -* -* -* The best choices of most of the parameters depend -* in an ill-understood way on the relative execution -* rate of xLAQR3 and xLAQR5 and on the nature of each -* particular eigenvalue problem. Experiment may be the -* only practical way to determine which choices are most -* effective. -* -* Following is a list of default values supplied by IPARMQ. -* These defaults may be adjusted in order to attain better -* performance in any particular computational environment. -* -* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. -* Default: 75. (Must be at least 11.) -* -* IPARMQ(ISPEC=13) Recommended deflation window size. -* This depends on ILO, IHI and NS, the -* number of simultaneous shifts returned -* by IPARMQ(ISPEC=15). The default for -* (IHI-ILO+1).LE.500 is NS. The default -* for (IHI-ILO+1).GT.500 is 3*NS/2. -* -* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. -* -* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. -* a multi-shift QR iteration. -* -* If IHI-ILO+1 is ... -* -* greater than ...but less ... the -* or equal to ... than default is -* -* 0 30 NS = 2+ -* 30 60 NS = 4+ -* 60 150 NS = 10 -* 150 590 NS = ** -* 590 3000 NS = 64 -* 3000 6000 NS = 128 -* 6000 infinity NS = 256 -* -* (+) By default matrices of this order are -* passed to the implicit double shift routine -* xLAHQR. See IPARMQ(ISPEC=12) above. These -* values of NS are used only in case of a rare -* xLAHQR failure. -* -* (**) The asterisks (**) indicate an ad-hoc -* function increasing from 10 to 64. -* -* IPARMQ(ISPEC=16) Select structured matrix multiply. -* (See ISPEC=16 above for details.) -* Default: 3. -* -* ================================================================ +* ================================================================ * .. Parameters .. - INTEGER INMIN, INWIN, INIBL, ISHFTS, IACC22 + INTEGER INMIN, INWIN, INIBL, ISHFTS, IACC22, ICOST PARAMETER ( INMIN = 12, INWIN = 13, INIBL = 14, - $ ISHFTS = 15, IACC22 = 16 ) - INTEGER NMIN, K22MIN, KACMIN, NIBBLE, KNWSWP + $ ISHFTS = 15, IACC22 = 16, ICOST = 17 ) + INTEGER NMIN, K22MIN, KACMIN, NIBBLE, KNWSWP, RCOST PARAMETER ( NMIN = 75, K22MIN = 14, KACMIN = 14, - $ NIBBLE = 14, KNWSWP = 500 ) + $ NIBBLE = 14, KNWSWP = 500, RCOST = 10 ) REAL TWO PARAMETER ( TWO = 2.0 ) * .. * .. Local Scalars .. INTEGER NH, NS + INTEGER I, IC, IZ + CHARACTER SUBNAM*6 * .. * .. Intrinsic Functions .. INTRINSIC LOG, MAX, MOD, NINT, REAL @@ -237,12 +320,81 @@ * . by making this choice dependent also upon the * . NH=IHI-ILO+1. * +* +* Convert NAME to upper case if the first character is lower case. +* IPARMQ = 0 - IF( NS.GE.KACMIN ) - $ IPARMQ = 1 - IF( NS.GE.K22MIN ) - $ IPARMQ = 2 + SUBNAM = NAME + IC = ICHAR( SUBNAM( 1: 1 ) ) + IZ = ICHAR( 'Z' ) + IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN +* +* ASCII character set +* + IF( IC.GE.97 .AND. IC.LE.122 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.97 .AND. IC.LE.122 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + END DO + END IF +* + ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN +* +* EBCDIC character set +* + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN + SUBNAM( 1: 1 ) = CHAR( IC+64 ) + DO I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: + $ I ) = CHAR( IC+64 ) + END DO + END IF +* + ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN +* +* Prime machines: ASCII+128 +* + IF( IC.GE.225 .AND. IC.LE.250 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.225 .AND. IC.LE.250 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + END DO + END IF + END IF +* + IF( SUBNAM( 2:6 ).EQ.'GGHRD' .OR. + $ SUBNAM( 2:6 ).EQ.'GGHD3' ) THEN + IPARMQ = 1 + IF( NH.GE.K22MIN ) + $ IPARMQ = 2 + ELSE IF ( SUBNAM( 4:6 ).EQ.'EXC' ) THEN + IF( NH.GE.KACMIN ) + $ IPARMQ = 1 + IF( NH.GE.K22MIN ) + $ IPARMQ = 2 + ELSE IF ( SUBNAM( 2:6 ).EQ.'HSEQR' .OR. + $ SUBNAM( 2:5 ).EQ.'LAQR' ) THEN + IF( NS.GE.KACMIN ) + $ IPARMQ = 1 + IF( NS.GE.K22MIN ) + $ IPARMQ = 2 + END IF +* + ELSE IF( ISPEC.EQ.ICOST ) THEN +* +* === Relative cost of near-the-diagonal chase vs +* BLAS updates === * + IPARMQ = RCOST ELSE * ===== invalid value of ispec ===== IPARMQ = -1