File:  [local] / rpl / lapack / lapack / iparmq.f
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Jan 26 15:22:46 2010 UTC (14 years, 4 months ago) by bertrand
Branches: MAIN
CVS tags: HEAD
Initial revision

    1:       INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
    2: *
    3: *  -- LAPACK auxiliary routine (version 3.2) --
    4: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
    5: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
    6: *     November 2006
    7: *     
    8: *     .. Scalar Arguments ..
    9:       INTEGER            IHI, ILO, ISPEC, LWORK, N
   10:       CHARACTER          NAME*( * ), OPTS*( * )
   11: *
   12: *  Purpose
   13: *  =======
   14: *
   15: *       This program sets problem and machine dependent parameters
   16: *       useful for xHSEQR and its subroutines. It is called whenever 
   17: *       ILAENV is called with 12 <= ISPEC <= 16
   18: *
   19: *  Arguments
   20: *  =========
   21: *
   22: *       ISPEC  (input) integer scalar
   23: *              ISPEC specifies which tunable parameter IPARMQ should
   24: *              return.
   25: *
   26: *              ISPEC=12: (INMIN)  Matrices of order nmin or less
   27: *                        are sent directly to xLAHQR, the implicit
   28: *                        double shift QR algorithm.  NMIN must be
   29: *                        at least 11.
   30: *
   31: *              ISPEC=13: (INWIN)  Size of the deflation window.
   32: *                        This is best set greater than or equal to
   33: *                        the number of simultaneous shifts NS.
   34: *                        Larger matrices benefit from larger deflation
   35: *                        windows.
   36: *
   37: *              ISPEC=14: (INIBL) Determines when to stop nibbling and
   38: *                        invest in an (expensive) multi-shift QR sweep.
   39: *                        If the aggressive early deflation subroutine
   40: *                        finds LD converged eigenvalues from an order
   41: *                        NW deflation window and LD.GT.(NW*NIBBLE)/100,
   42: *                        then the next QR sweep is skipped and early
   43: *                        deflation is applied immediately to the
   44: *                        remaining active diagonal block.  Setting
   45: *                        IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a
   46: *                        multi-shift QR sweep whenever early deflation
   47: *                        finds a converged eigenvalue.  Setting
   48: *                        IPARMQ(ISPEC=14) greater than or equal to 100
   49: *                        prevents TTQRE from skipping a multi-shift
   50: *                        QR sweep.
   51: *
   52: *              ISPEC=15: (NSHFTS) The number of simultaneous shifts in
   53: *                        a multi-shift QR iteration.
   54: *
   55: *              ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the
   56: *                        following meanings.
   57: *                        0:  During the multi-shift QR sweep,
   58: *                            xLAQR5 does not accumulate reflections and
   59: *                            does not use matrix-matrix multiply to
   60: *                            update the far-from-diagonal matrix
   61: *                            entries.
   62: *                        1:  During the multi-shift QR sweep,
   63: *                            xLAQR5 and/or xLAQRaccumulates reflections and uses
   64: *                            matrix-matrix multiply to update the
   65: *                            far-from-diagonal matrix entries.
   66: *                        2:  During the multi-shift QR sweep.
   67: *                            xLAQR5 accumulates reflections and takes
   68: *                            advantage of 2-by-2 block structure during
   69: *                            matrix-matrix multiplies.
   70: *                        (If xTRMM is slower than xGEMM, then
   71: *                        IPARMQ(ISPEC=16)=1 may be more efficient than
   72: *                        IPARMQ(ISPEC=16)=2 despite the greater level of
   73: *                        arithmetic work implied by the latter choice.)
   74: *
   75: *       NAME    (input) character string
   76: *               Name of the calling subroutine
   77: *
   78: *       OPTS    (input) character string
   79: *               This is a concatenation of the string arguments to
   80: *               TTQRE.
   81: *
   82: *       N       (input) integer scalar
   83: *               N is the order of the Hessenberg matrix H.
   84: *
   85: *       ILO     (input) INTEGER
   86: *       IHI     (input) INTEGER
   87: *               It is assumed that H is already upper triangular
   88: *               in rows and columns 1:ILO-1 and IHI+1:N.
   89: *
   90: *       LWORK   (input) integer scalar
   91: *               The amount of workspace available.
   92: *
   93: *  Further Details
   94: *  ===============
   95: *
   96: *       Little is known about how best to choose these parameters.
   97: *       It is possible to use different values of the parameters
   98: *       for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR.
   99: *
  100: *       It is probably best to choose different parameters for
  101: *       different matrices and different parameters at different
  102: *       times during the iteration, but this has not been
  103: *       implemented --- yet.
  104: *
  105: *
  106: *       The best choices of most of the parameters depend
  107: *       in an ill-understood way on the relative execution
  108: *       rate of xLAQR3 and xLAQR5 and on the nature of each
  109: *       particular eigenvalue problem.  Experiment may be the
  110: *       only practical way to determine which choices are most
  111: *       effective.
  112: *
  113: *       Following is a list of default values supplied by IPARMQ.
  114: *       These defaults may be adjusted in order to attain better
  115: *       performance in any particular computational environment.
  116: *
  117: *       IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point.
  118: *                        Default: 75. (Must be at least 11.)
  119: *
  120: *       IPARMQ(ISPEC=13) Recommended deflation window size.
  121: *                        This depends on ILO, IHI and NS, the
  122: *                        number of simultaneous shifts returned
  123: *                        by IPARMQ(ISPEC=15).  The default for
  124: *                        (IHI-ILO+1).LE.500 is NS.  The default
  125: *                        for (IHI-ILO+1).GT.500 is 3*NS/2.
  126: *
  127: *       IPARMQ(ISPEC=14) Nibble crossover point.  Default: 14.
  128: *
  129: *       IPARMQ(ISPEC=15) Number of simultaneous shifts, NS.
  130: *                        a multi-shift QR iteration.
  131: *
  132: *                        If IHI-ILO+1 is ...
  133: *
  134: *                        greater than      ...but less    ... the
  135: *                        or equal to ...      than        default is
  136: *
  137: *                                0               30       NS =   2+
  138: *                               30               60       NS =   4+
  139: *                               60              150       NS =  10
  140: *                              150              590       NS =  **
  141: *                              590             3000       NS =  64
  142: *                             3000             6000       NS = 128
  143: *                             6000             infinity   NS = 256
  144: *
  145: *                    (+)  By default matrices of this order are
  146: *                         passed to the implicit double shift routine
  147: *                         xLAHQR.  See IPARMQ(ISPEC=12) above.   These
  148: *                         values of NS are used only in case of a rare
  149: *                         xLAHQR failure.
  150: *
  151: *                    (**) The asterisks (**) indicate an ad-hoc
  152: *                         function increasing from 10 to 64.
  153: *
  154: *       IPARMQ(ISPEC=16) Select structured matrix multiply.
  155: *                        (See ISPEC=16 above for details.)
  156: *                        Default: 3.
  157: *
  158: *     ================================================================
  159: *     .. Parameters ..
  160:       INTEGER            INMIN, INWIN, INIBL, ISHFTS, IACC22
  161:       PARAMETER          ( INMIN = 12, INWIN = 13, INIBL = 14,
  162:      $                   ISHFTS = 15, IACC22 = 16 )
  163:       INTEGER            NMIN, K22MIN, KACMIN, NIBBLE, KNWSWP
  164:       PARAMETER          ( NMIN = 75, K22MIN = 14, KACMIN = 14,
  165:      $                   NIBBLE = 14, KNWSWP = 500 )
  166:       REAL               TWO
  167:       PARAMETER          ( TWO = 2.0 )
  168: *     ..
  169: *     .. Local Scalars ..
  170:       INTEGER            NH, NS
  171: *     ..
  172: *     .. Intrinsic Functions ..
  173:       INTRINSIC          LOG, MAX, MOD, NINT, REAL
  174: *     ..
  175: *     .. Executable Statements ..
  176:       IF( ( ISPEC.EQ.ISHFTS ) .OR. ( ISPEC.EQ.INWIN ) .OR.
  177:      $    ( ISPEC.EQ.IACC22 ) ) THEN
  178: *
  179: *        ==== Set the number simultaneous shifts ====
  180: *
  181:          NH = IHI - ILO + 1
  182:          NS = 2
  183:          IF( NH.GE.30 )
  184:      $      NS = 4
  185:          IF( NH.GE.60 )
  186:      $      NS = 10
  187:          IF( NH.GE.150 )
  188:      $      NS = MAX( 10, NH / NINT( LOG( REAL( NH ) ) / LOG( TWO ) ) )
  189:          IF( NH.GE.590 )
  190:      $      NS = 64
  191:          IF( NH.GE.3000 )
  192:      $      NS = 128
  193:          IF( NH.GE.6000 )
  194:      $      NS = 256
  195:          NS = MAX( 2, NS-MOD( NS, 2 ) )
  196:       END IF
  197: *
  198:       IF( ISPEC.EQ.INMIN ) THEN
  199: *
  200: *
  201: *        ===== Matrices of order smaller than NMIN get sent
  202: *        .     to xLAHQR, the classic double shift algorithm.
  203: *        .     This must be at least 11. ====
  204: *
  205:          IPARMQ = NMIN
  206: *
  207:       ELSE IF( ISPEC.EQ.INIBL ) THEN
  208: *
  209: *        ==== INIBL: skip a multi-shift qr iteration and
  210: *        .    whenever aggressive early deflation finds
  211: *        .    at least (NIBBLE*(window size)/100) deflations. ====
  212: *
  213:          IPARMQ = NIBBLE
  214: *
  215:       ELSE IF( ISPEC.EQ.ISHFTS ) THEN
  216: *
  217: *        ==== NSHFTS: The number of simultaneous shifts =====
  218: *
  219:          IPARMQ = NS
  220: *
  221:       ELSE IF( ISPEC.EQ.INWIN ) THEN
  222: *
  223: *        ==== NW: deflation window size.  ====
  224: *
  225:          IF( NH.LE.KNWSWP ) THEN
  226:             IPARMQ = NS
  227:          ELSE
  228:             IPARMQ = 3*NS / 2
  229:          END IF
  230: *
  231:       ELSE IF( ISPEC.EQ.IACC22 ) THEN
  232: *
  233: *        ==== IACC22: Whether to accumulate reflections
  234: *        .     before updating the far-from-diagonal elements
  235: *        .     and whether to use 2-by-2 block structure while
  236: *        .     doing it.  A small amount of work could be saved
  237: *        .     by making this choice dependent also upon the
  238: *        .     NH=IHI-ILO+1.
  239: *
  240:          IPARMQ = 0
  241:          IF( NS.GE.KACMIN )
  242:      $      IPARMQ = 1
  243:          IF( NS.GE.K22MIN )
  244:      $      IPARMQ = 2
  245: *
  246:       ELSE
  247: *        ===== invalid value of ispec =====
  248:          IPARMQ = -1
  249: *
  250:       END IF
  251: *
  252: *     ==== End of IPARMQ ====
  253: *
  254:       END

CVSweb interface <joel.bertrand@systella.fr>