File:  [local] / rpl / lapack / lapack / iparmq.f
Revision 1.21: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:39:14 2023 UTC (8 months, 3 weeks ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD
Première mise à jour de lapack et blas.

    1: *> \brief \b IPARMQ
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at
    6: *            http://www.netlib.org/lapack/explore-html/
    7: *
    8: *> \htmlonly
    9: *> Download IPARMQ + dependencies
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparmq.f">
   11: *> [TGZ]</a>
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparmq.f">
   13: *> [ZIP]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparmq.f">
   15: *> [TXT]</a>
   16: *> \endhtmlonly
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *       INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
   22: *
   23: *       .. Scalar Arguments ..
   24: *       INTEGER            IHI, ILO, ISPEC, LWORK, N
   25: *       CHARACTER          NAME*( * ), OPTS*( * )
   26: *
   27: *
   28: *> \par Purpose:
   29: *  =============
   30: *>
   31: *> \verbatim
   32: *>
   33: *>      This program sets problem and machine dependent parameters
   34: *>      useful for xHSEQR and related subroutines for eigenvalue
   35: *>      problems. It is called whenever
   36: *>      IPARMQ is called with 12 <= ISPEC <= 16
   37: *> \endverbatim
   38: *
   39: *  Arguments:
   40: *  ==========
   41: *
   42: *> \param[in] ISPEC
   43: *> \verbatim
   44: *>          ISPEC is INTEGER
   45: *>              ISPEC specifies which tunable parameter IPARMQ should
   46: *>              return.
   47: *>
   48: *>              ISPEC=12: (INMIN)  Matrices of order nmin or less
   49: *>                        are sent directly to xLAHQR, the implicit
   50: *>                        double shift QR algorithm.  NMIN must be
   51: *>                        at least 11.
   52: *>
   53: *>              ISPEC=13: (INWIN)  Size of the deflation window.
   54: *>                        This is best set greater than or equal to
   55: *>                        the number of simultaneous shifts NS.
   56: *>                        Larger matrices benefit from larger deflation
   57: *>                        windows.
   58: *>
   59: *>              ISPEC=14: (INIBL) Determines when to stop nibbling and
   60: *>                        invest in an (expensive) multi-shift QR sweep.
   61: *>                        If the aggressive early deflation subroutine
   62: *>                        finds LD converged eigenvalues from an order
   63: *>                        NW deflation window and LD > (NW*NIBBLE)/100,
   64: *>                        then the next QR sweep is skipped and early
   65: *>                        deflation is applied immediately to the
   66: *>                        remaining active diagonal block.  Setting
   67: *>                        IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a
   68: *>                        multi-shift QR sweep whenever early deflation
   69: *>                        finds a converged eigenvalue.  Setting
   70: *>                        IPARMQ(ISPEC=14) greater than or equal to 100
   71: *>                        prevents TTQRE from skipping a multi-shift
   72: *>                        QR sweep.
   73: *>
   74: *>              ISPEC=15: (NSHFTS) The number of simultaneous shifts in
   75: *>                        a multi-shift QR iteration.
   76: *>
   77: *>              ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the
   78: *>                        following meanings.
   79: *>                        0:  During the multi-shift QR/QZ sweep,
   80: *>                            blocked eigenvalue reordering, blocked
   81: *>                            Hessenberg-triangular reduction,
   82: *>                            reflections and/or rotations are not
   83: *>                            accumulated when updating the
   84: *>                            far-from-diagonal matrix entries.
   85: *>                        1:  During the multi-shift QR/QZ sweep,
   86: *>                            blocked eigenvalue reordering, blocked
   87: *>                            Hessenberg-triangular reduction,
   88: *>                            reflections and/or rotations are
   89: *>                            accumulated, and matrix-matrix
   90: *>                            multiplication is used to update the
   91: *>                            far-from-diagonal matrix entries.
   92: *>                        2:  During the multi-shift QR/QZ sweep,
   93: *>                            blocked eigenvalue reordering, blocked
   94: *>                            Hessenberg-triangular reduction,
   95: *>                            reflections and/or rotations are
   96: *>                            accumulated, and 2-by-2 block structure
   97: *>                            is exploited during matrix-matrix
   98: *>                            multiplies.
   99: *>                        (If xTRMM is slower than xGEMM, then
  100: *>                        IPARMQ(ISPEC=16)=1 may be more efficient than
  101: *>                        IPARMQ(ISPEC=16)=2 despite the greater level of
  102: *>                        arithmetic work implied by the latter choice.)
  103: *>
  104: *>              ISPEC=17: (ICOST) An estimate of the relative cost of flops
  105: *>                        within the near-the-diagonal shift chase compared
  106: *>                        to flops within the BLAS calls of a QZ sweep.
  107: *> \endverbatim
  108: *>
  109: *> \param[in] NAME
  110: *> \verbatim
  111: *>          NAME is CHARACTER string
  112: *>               Name of the calling subroutine
  113: *> \endverbatim
  114: *>
  115: *> \param[in] OPTS
  116: *> \verbatim
  117: *>          OPTS is CHARACTER string
  118: *>               This is a concatenation of the string arguments to
  119: *>               TTQRE.
  120: *> \endverbatim
  121: *>
  122: *> \param[in] N
  123: *> \verbatim
  124: *>          N is INTEGER
  125: *>               N is the order of the Hessenberg matrix H.
  126: *> \endverbatim
  127: *>
  128: *> \param[in] ILO
  129: *> \verbatim
  130: *>          ILO is INTEGER
  131: *> \endverbatim
  132: *>
  133: *> \param[in] IHI
  134: *> \verbatim
  135: *>          IHI is INTEGER
  136: *>               It is assumed that H is already upper triangular
  137: *>               in rows and columns 1:ILO-1 and IHI+1:N.
  138: *> \endverbatim
  139: *>
  140: *> \param[in] LWORK
  141: *> \verbatim
  142: *>          LWORK is INTEGER
  143: *>               The amount of workspace available.
  144: *> \endverbatim
  145: *
  146: *  Authors:
  147: *  ========
  148: *
  149: *> \author Univ. of Tennessee
  150: *> \author Univ. of California Berkeley
  151: *> \author Univ. of Colorado Denver
  152: *> \author NAG Ltd.
  153: *
  154: *> \ingroup OTHERauxiliary
  155: *
  156: *> \par Further Details:
  157: *  =====================
  158: *>
  159: *> \verbatim
  160: *>
  161: *>       Little is known about how best to choose these parameters.
  162: *>       It is possible to use different values of the parameters
  163: *>       for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR.
  164: *>
  165: *>       It is probably best to choose different parameters for
  166: *>       different matrices and different parameters at different
  167: *>       times during the iteration, but this has not been
  168: *>       implemented --- yet.
  169: *>
  170: *>
  171: *>       The best choices of most of the parameters depend
  172: *>       in an ill-understood way on the relative execution
  173: *>       rate of xLAQR3 and xLAQR5 and on the nature of each
  174: *>       particular eigenvalue problem.  Experiment may be the
  175: *>       only practical way to determine which choices are most
  176: *>       effective.
  177: *>
  178: *>       Following is a list of default values supplied by IPARMQ.
  179: *>       These defaults may be adjusted in order to attain better
  180: *>       performance in any particular computational environment.
  181: *>
  182: *>       IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point.
  183: *>                        Default: 75. (Must be at least 11.)
  184: *>
  185: *>       IPARMQ(ISPEC=13) Recommended deflation window size.
  186: *>                        This depends on ILO, IHI and NS, the
  187: *>                        number of simultaneous shifts returned
  188: *>                        by IPARMQ(ISPEC=15).  The default for
  189: *>                        (IHI-ILO+1) <= 500 is NS.  The default
  190: *>                        for (IHI-ILO+1) > 500 is 3*NS/2.
  191: *>
  192: *>       IPARMQ(ISPEC=14) Nibble crossover point.  Default: 14.
  193: *>
  194: *>       IPARMQ(ISPEC=15) Number of simultaneous shifts, NS.
  195: *>                        a multi-shift QR iteration.
  196: *>
  197: *>                        If IHI-ILO+1 is ...
  198: *>
  199: *>                        greater than      ...but less    ... the
  200: *>                        or equal to ...      than        default is
  201: *>
  202: *>                                0               30       NS =   2+
  203: *>                               30               60       NS =   4+
  204: *>                               60              150       NS =  10
  205: *>                              150              590       NS =  **
  206: *>                              590             3000       NS =  64
  207: *>                             3000             6000       NS = 128
  208: *>                             6000             infinity   NS = 256
  209: *>
  210: *>                    (+)  By default matrices of this order are
  211: *>                         passed to the implicit double shift routine
  212: *>                         xLAHQR.  See IPARMQ(ISPEC=12) above.   These
  213: *>                         values of NS are used only in case of a rare
  214: *>                         xLAHQR failure.
  215: *>
  216: *>                    (**) The asterisks (**) indicate an ad-hoc
  217: *>                         function increasing from 10 to 64.
  218: *>
  219: *>       IPARMQ(ISPEC=16) Select structured matrix multiply.
  220: *>                        (See ISPEC=16 above for details.)
  221: *>                        Default: 3.
  222: *>
  223: *>       IPARMQ(ISPEC=17) Relative cost heuristic for blocksize selection.
  224: *>                        Expressed as a percentage.
  225: *>                        Default: 10.
  226: *> \endverbatim
  227: *>
  228: *  =====================================================================
  229:       INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
  230: *
  231: *  -- LAPACK auxiliary routine --
  232: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  233: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  234: *
  235: *     .. Scalar Arguments ..
  236:       INTEGER            IHI, ILO, ISPEC, LWORK, N
  237:       CHARACTER          NAME*( * ), OPTS*( * )
  238: *
  239: *  ================================================================
  240: *     .. Parameters ..
  241:       INTEGER            INMIN, INWIN, INIBL, ISHFTS, IACC22, ICOST
  242:       PARAMETER          ( INMIN = 12, INWIN = 13, INIBL = 14,
  243:      $                   ISHFTS = 15, IACC22 = 16, ICOST = 17 )
  244:       INTEGER            NMIN, K22MIN, KACMIN, NIBBLE, KNWSWP, RCOST
  245:       PARAMETER          ( NMIN = 75, K22MIN = 14, KACMIN = 14,
  246:      $                   NIBBLE = 14, KNWSWP = 500, RCOST = 10 )
  247:       REAL               TWO
  248:       PARAMETER          ( TWO = 2.0 )
  249: *     ..
  250: *     .. Local Scalars ..
  251:       INTEGER            NH, NS
  252:       INTEGER            I, IC, IZ
  253:       CHARACTER          SUBNAM*6
  254: *     ..
  255: *     .. Intrinsic Functions ..
  256:       INTRINSIC          LOG, MAX, MOD, NINT, REAL
  257: *     ..
  258: *     .. Executable Statements ..
  259:       IF( ( ISPEC.EQ.ISHFTS ) .OR. ( ISPEC.EQ.INWIN ) .OR.
  260:      $    ( ISPEC.EQ.IACC22 ) ) THEN
  261: *
  262: *        ==== Set the number simultaneous shifts ====
  263: *
  264:          NH = IHI - ILO + 1
  265:          NS = 2
  266:          IF( NH.GE.30 )
  267:      $      NS = 4
  268:          IF( NH.GE.60 )
  269:      $      NS = 10
  270:          IF( NH.GE.150 )
  271:      $      NS = MAX( 10, NH / NINT( LOG( REAL( NH ) ) / LOG( TWO ) ) )
  272:          IF( NH.GE.590 )
  273:      $      NS = 64
  274:          IF( NH.GE.3000 )
  275:      $      NS = 128
  276:          IF( NH.GE.6000 )
  277:      $      NS = 256
  278:          NS = MAX( 2, NS-MOD( NS, 2 ) )
  279:       END IF
  280: *
  281:       IF( ISPEC.EQ.INMIN ) THEN
  282: *
  283: *
  284: *        ===== Matrices of order smaller than NMIN get sent
  285: *        .     to xLAHQR, the classic double shift algorithm.
  286: *        .     This must be at least 11. ====
  287: *
  288:          IPARMQ = NMIN
  289: *
  290:       ELSE IF( ISPEC.EQ.INIBL ) THEN
  291: *
  292: *        ==== INIBL: skip a multi-shift qr iteration and
  293: *        .    whenever aggressive early deflation finds
  294: *        .    at least (NIBBLE*(window size)/100) deflations. ====
  295: *
  296:          IPARMQ = NIBBLE
  297: *
  298:       ELSE IF( ISPEC.EQ.ISHFTS ) THEN
  299: *
  300: *        ==== NSHFTS: The number of simultaneous shifts =====
  301: *
  302:          IPARMQ = NS
  303: *
  304:       ELSE IF( ISPEC.EQ.INWIN ) THEN
  305: *
  306: *        ==== NW: deflation window size.  ====
  307: *
  308:          IF( NH.LE.KNWSWP ) THEN
  309:             IPARMQ = NS
  310:          ELSE
  311:             IPARMQ = 3*NS / 2
  312:          END IF
  313: *
  314:       ELSE IF( ISPEC.EQ.IACC22 ) THEN
  315: *
  316: *        ==== IACC22: Whether to accumulate reflections
  317: *        .     before updating the far-from-diagonal elements
  318: *        .     and whether to use 2-by-2 block structure while
  319: *        .     doing it.  A small amount of work could be saved
  320: *        .     by making this choice dependent also upon the
  321: *        .     NH=IHI-ILO+1.
  322: *
  323: *
  324: *        Convert NAME to upper case if the first character is lower case.
  325: *
  326:          IPARMQ = 0
  327:          SUBNAM = NAME
  328:          IC = ICHAR( SUBNAM( 1: 1 ) )
  329:          IZ = ICHAR( 'Z' )
  330:          IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
  331: *
  332: *           ASCII character set
  333: *
  334:             IF( IC.GE.97 .AND. IC.LE.122 ) THEN
  335:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
  336:                DO I = 2, 6
  337:                   IC = ICHAR( SUBNAM( I: I ) )
  338:                   IF( IC.GE.97 .AND. IC.LE.122 )
  339:      $               SUBNAM( I: I ) = CHAR( IC-32 )
  340:                END DO
  341:             END IF
  342: *
  343:          ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
  344: *
  345: *           EBCDIC character set
  346: *
  347:             IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
  348:      $          ( IC.GE.145 .AND. IC.LE.153 ) .OR.
  349:      $          ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
  350:                SUBNAM( 1: 1 ) = CHAR( IC+64 )
  351:                DO I = 2, 6
  352:                   IC = ICHAR( SUBNAM( I: I ) )
  353:                   IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
  354:      $                ( IC.GE.145 .AND. IC.LE.153 ) .OR.
  355:      $                ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
  356:      $                I ) = CHAR( IC+64 )
  357:                END DO
  358:             END IF
  359: *
  360:          ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
  361: *
  362: *           Prime machines:  ASCII+128
  363: *
  364:             IF( IC.GE.225 .AND. IC.LE.250 ) THEN
  365:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
  366:                DO I = 2, 6
  367:                   IC = ICHAR( SUBNAM( I: I ) )
  368:                   IF( IC.GE.225 .AND. IC.LE.250 )
  369:      $               SUBNAM( I: I ) = CHAR( IC-32 )
  370:                END DO
  371:             END IF
  372:          END IF
  373: *
  374:          IF( SUBNAM( 2:6 ).EQ.'GGHRD' .OR.
  375:      $       SUBNAM( 2:6 ).EQ.'GGHD3' ) THEN
  376:             IPARMQ = 1
  377:             IF( NH.GE.K22MIN )
  378:      $         IPARMQ = 2
  379:          ELSE IF ( SUBNAM( 4:6 ).EQ.'EXC' ) THEN
  380:             IF( NH.GE.KACMIN )
  381:      $         IPARMQ = 1
  382:             IF( NH.GE.K22MIN )
  383:      $         IPARMQ = 2
  384:          ELSE IF ( SUBNAM( 2:6 ).EQ.'HSEQR' .OR.
  385:      $             SUBNAM( 2:5 ).EQ.'LAQR' ) THEN
  386:             IF( NS.GE.KACMIN )
  387:      $         IPARMQ = 1
  388:             IF( NS.GE.K22MIN )
  389:      $         IPARMQ = 2
  390:          END IF
  391: *
  392:       ELSE IF( ISPEC.EQ.ICOST ) THEN
  393: *
  394: *        === Relative cost of near-the-diagonal chase vs
  395: *            BLAS updates ===
  396: *
  397:          IPARMQ = RCOST
  398:       ELSE
  399: *        ===== invalid value of ispec =====
  400:          IPARMQ = -1
  401: *
  402:       END IF
  403: *
  404: *     ==== End of IPARMQ ====
  405: *
  406:       END

CVSweb interface <joel.bertrand@systella.fr>