rpl/lapack/lapack/dlaqr5.f - view

Return to dlaqr5.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / dlaqr5.f
Revision 1.23: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:38:56 2023 UTC (9 months, 3 weeks ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD

Première mise à jour de lapack et blas.

    1: *> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at
    6: *            http://www.netlib.org/lapack/explore-html/
    7: *
    8: *> \htmlonly
    9: *> Download DLAQR5 + dependencies
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
   11: *> [TGZ]</a>
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
   13: *> [ZIP]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
   15: *> [TXT]</a>
   16: *> \endhtmlonly
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *       SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
   22: *                          SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
   23: *                          LDU, NV, WV, LDWV, NH, WH, LDWH )
   24: *
   25: *       .. Scalar Arguments ..
   26: *       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
   27: *      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
   28: *       LOGICAL            WANTT, WANTZ
   29: *       ..
   30: *       .. Array Arguments ..
   31: *       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
   32: *      $                   V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
   33: *      $                   Z( LDZ, * )
   34: *       ..
   35: *
   36: *
   37: *> \par Purpose:
   38: *  =============
   39: *>
   40: *> \verbatim
   41: *>
   42: *>    DLAQR5, called by DLAQR0, performs a
   43: *>    single small-bulge multi-shift QR sweep.
   44: *> \endverbatim
   45: *
   46: *  Arguments:
   47: *  ==========
   48: *
   49: *> \param[in] WANTT
   50: *> \verbatim
   51: *>          WANTT is LOGICAL
   52: *>             WANTT = .true. if the quasi-triangular Schur factor
   53: *>             is being computed.  WANTT is set to .false. otherwise.
   54: *> \endverbatim
   55: *>
   56: *> \param[in] WANTZ
   57: *> \verbatim
   58: *>          WANTZ is LOGICAL
   59: *>             WANTZ = .true. if the orthogonal Schur factor is being
   60: *>             computed.  WANTZ is set to .false. otherwise.
   61: *> \endverbatim
   62: *>
   63: *> \param[in] KACC22
   64: *> \verbatim
   65: *>          KACC22 is INTEGER with value 0, 1, or 2.
   66: *>             Specifies the computation mode of far-from-diagonal
   67: *>             orthogonal updates.
   68: *>        = 0: DLAQR5 does not accumulate reflections and does not
   69: *>             use matrix-matrix multiply to update far-from-diagonal
   70: *>             matrix entries.
   71: *>        = 1: DLAQR5 accumulates reflections and uses matrix-matrix
   72: *>             multiply to update the far-from-diagonal matrix entries.
   73: *>        = 2: Same as KACC22 = 1. This option used to enable exploiting
   74: *>             the 2-by-2 structure during matrix multiplications, but
   75: *>             this is no longer supported.
   76: *> \endverbatim
   77: *>
   78: *> \param[in] N
   79: *> \verbatim
   80: *>          N is INTEGER
   81: *>             N is the order of the Hessenberg matrix H upon which this
   82: *>             subroutine operates.
   83: *> \endverbatim
   84: *>
   85: *> \param[in] KTOP
   86: *> \verbatim
   87: *>          KTOP is INTEGER
   88: *> \endverbatim
   89: *>
   90: *> \param[in] KBOT
   91: *> \verbatim
   92: *>          KBOT is INTEGER
   93: *>             These are the first and last rows and columns of an
   94: *>             isolated diagonal block upon which the QR sweep is to be
   95: *>             applied. It is assumed without a check that
   96: *>                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
   97: *>             and
   98: *>                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
   99: *> \endverbatim
  100: *>
  101: *> \param[in] NSHFTS
  102: *> \verbatim
  103: *>          NSHFTS is INTEGER
  104: *>             NSHFTS gives the number of simultaneous shifts.  NSHFTS
  105: *>             must be positive and even.
  106: *> \endverbatim
  107: *>
  108: *> \param[in,out] SR
  109: *> \verbatim
  110: *>          SR is DOUBLE PRECISION array, dimension (NSHFTS)
  111: *> \endverbatim
  112: *>
  113: *> \param[in,out] SI
  114: *> \verbatim
  115: *>          SI is DOUBLE PRECISION array, dimension (NSHFTS)
  116: *>             SR contains the real parts and SI contains the imaginary
  117: *>             parts of the NSHFTS shifts of origin that define the
  118: *>             multi-shift QR sweep.  On output SR and SI may be
  119: *>             reordered.
  120: *> \endverbatim
  121: *>
  122: *> \param[in,out] H
  123: *> \verbatim
  124: *>          H is DOUBLE PRECISION array, dimension (LDH,N)
  125: *>             On input H contains a Hessenberg matrix.  On output a
  126: *>             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
  127: *>             to the isolated diagonal block in rows and columns KTOP
  128: *>             through KBOT.
  129: *> \endverbatim
  130: *>
  131: *> \param[in] LDH
  132: *> \verbatim
  133: *>          LDH is INTEGER
  134: *>             LDH is the leading dimension of H just as declared in the
  135: *>             calling procedure.  LDH >= MAX(1,N).
  136: *> \endverbatim
  137: *>
  138: *> \param[in] ILOZ
  139: *> \verbatim
  140: *>          ILOZ is INTEGER
  141: *> \endverbatim
  142: *>
  143: *> \param[in] IHIZ
  144: *> \verbatim
  145: *>          IHIZ is INTEGER
  146: *>             Specify the rows of Z to which transformations must be
  147: *>             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
  148: *> \endverbatim
  149: *>
  150: *> \param[in,out] Z
  151: *> \verbatim
  152: *>          Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ)
  153: *>             If WANTZ = .TRUE., then the QR Sweep orthogonal
  154: *>             similarity transformation is accumulated into
  155: *>             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
  156: *>             If WANTZ = .FALSE., then Z is unreferenced.
  157: *> \endverbatim
  158: *>
  159: *> \param[in] LDZ
  160: *> \verbatim
  161: *>          LDZ is INTEGER
  162: *>             LDA is the leading dimension of Z just as declared in
  163: *>             the calling procedure. LDZ >= N.
  164: *> \endverbatim
  165: *>
  166: *> \param[out] V
  167: *> \verbatim
  168: *>          V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2)
  169: *> \endverbatim
  170: *>
  171: *> \param[in] LDV
  172: *> \verbatim
  173: *>          LDV is INTEGER
  174: *>             LDV is the leading dimension of V as declared in the
  175: *>             calling procedure.  LDV >= 3.
  176: *> \endverbatim
  177: *>
  178: *> \param[out] U
  179: *> \verbatim
  180: *>          U is DOUBLE PRECISION array, dimension (LDU,2*NSHFTS)
  181: *> \endverbatim
  182: *>
  183: *> \param[in] LDU
  184: *> \verbatim
  185: *>          LDU is INTEGER
  186: *>             LDU is the leading dimension of U just as declared in the
  187: *>             in the calling subroutine.  LDU >= 2*NSHFTS.
  188: *> \endverbatim
  189: *>
  190: *> \param[in] NV
  191: *> \verbatim
  192: *>          NV is INTEGER
  193: *>             NV is the number of rows in WV agailable for workspace.
  194: *>             NV >= 1.
  195: *> \endverbatim
  196: *>
  197: *> \param[out] WV
  198: *> \verbatim
  199: *>          WV is DOUBLE PRECISION array, dimension (LDWV,2*NSHFTS)
  200: *> \endverbatim
  201: *>
  202: *> \param[in] LDWV
  203: *> \verbatim
  204: *>          LDWV is INTEGER
  205: *>             LDWV is the leading dimension of WV as declared in the
  206: *>             in the calling subroutine.  LDWV >= NV.
  207: *> \endverbatim
  208: *
  209: *> \param[in] NH
  210: *> \verbatim
  211: *>          NH is INTEGER
  212: *>             NH is the number of columns in array WH available for
  213: *>             workspace. NH >= 1.
  214: *> \endverbatim
  215: *>
  216: *> \param[out] WH
  217: *> \verbatim
  218: *>          WH is DOUBLE PRECISION array, dimension (LDWH,NH)
  219: *> \endverbatim
  220: *>
  221: *> \param[in] LDWH
  222: *> \verbatim
  223: *>          LDWH is INTEGER
  224: *>             Leading dimension of WH just as declared in the
  225: *>             calling procedure.  LDWH >= 2*NSHFTS.
  226: *> \endverbatim
  227: *>
  228: *  Authors:
  229: *  ========
  230: *
  231: *> \author Univ. of Tennessee
  232: *> \author Univ. of California Berkeley
  233: *> \author Univ. of Colorado Denver
  234: *> \author NAG Ltd.
  235: *
  236: *> \ingroup doubleOTHERauxiliary
  237: *
  238: *> \par Contributors:
  239: *  ==================
  240: *>
  241: *>       Karen Braman and Ralph Byers, Department of Mathematics,
  242: *>       University of Kansas, USA
  243: *>
  244: *>       Lars Karlsson, Daniel Kressner, and Bruno Lang
  245: *>
  246: *>       Thijs Steel, Department of Computer science,
  247: *>       KU Leuven, Belgium
  248: *
  249: *> \par References:
  250: *  ================
  251: *>
  252: *>       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
  253: *>       Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
  254: *>       Performance, SIAM Journal of Matrix Analysis, volume 23, pages
  255: *>       929--947, 2002.
  256: *>
  257: *>       Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
  258: *>       chains of bulges in multishift QR algorithms.
  259: *>       ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
  260: *>
  261: *  =====================================================================
  262:       SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
  263:      $                   SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
  264:      $                   LDU, NV, WV, LDWV, NH, WH, LDWH )
  265:       IMPLICIT NONE
  266: *
  267: *  -- LAPACK auxiliary routine --
  268: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  269: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  270: *
  271: *     .. Scalar Arguments ..
  272:       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
  273:      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
  274:       LOGICAL            WANTT, WANTZ
  275: *     ..
  276: *     .. Array Arguments ..
  277:       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
  278:      $                   V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
  279:      $                   Z( LDZ, * )
  280: *     ..
  281: *
  282: *  ================================================================
  283: *     .. Parameters ..
  284:       DOUBLE PRECISION   ZERO, ONE
  285:       PARAMETER          ( ZERO = 0.0d0, ONE = 1.0d0 )
  286: *     ..
  287: *     .. Local Scalars ..
  288:       DOUBLE PRECISION   ALPHA, BETA, H11, H12, H21, H22, REFSUM,
  289:      $                   SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
  290:      $                   T3, TST1, TST2, ULP
  291:       INTEGER            I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
  292:      $                   JROW, JTOP, K, K1, KDU, KMS, KRCOL,
  293:      $                   M, M22, MBOT, MTOP, NBMPS, NDCOL,
  294:      $                   NS, NU
  295:       LOGICAL            ACCUM, BMP22
  296: *     ..
  297: *     .. External Functions ..
  298:       DOUBLE PRECISION   DLAMCH
  299:       EXTERNAL           DLAMCH
  300: *     ..
  301: *     .. Intrinsic Functions ..
  302: *
  303:       INTRINSIC          ABS, DBLE, MAX, MIN, MOD
  304: *     ..
  305: *     .. Local Arrays ..
  306:       DOUBLE PRECISION   VT( 3 )
  307: *     ..
  308: *     .. External Subroutines ..
  309:       EXTERNAL           DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET,
  310:      $                   DTRMM
  311: *     ..
  312: *     .. Executable Statements ..
  313: *
  314: *     ==== If there are no shifts, then there is nothing to do. ====
  315: *
  316:       IF( NSHFTS.LT.2 )
  317:      $   RETURN
  318: *
  319: *     ==== If the active block is empty or 1-by-1, then there
  320: *     .    is nothing to do. ====
  321: *
  322:       IF( KTOP.GE.KBOT )
  323:      $   RETURN
  324: *
  325: *     ==== Shuffle shifts into pairs of real shifts and pairs
  326: *     .    of complex conjugate shifts assuming complex
  327: *     .    conjugate shifts are already adjacent to one
  328: *     .    another. ====
  329: *
  330:       DO 10 I = 1, NSHFTS - 2, 2
  331:          IF( SI( I ).NE.-SI( I+1 ) ) THEN
  332: *
  333:             SWAP = SR( I )
  334:             SR( I ) = SR( I+1 )
  335:             SR( I+1 ) = SR( I+2 )
  336:             SR( I+2 ) = SWAP
  337: *
  338:             SWAP = SI( I )
  339:             SI( I ) = SI( I+1 )
  340:             SI( I+1 ) = SI( I+2 )
  341:             SI( I+2 ) = SWAP
  342:          END IF
  343:    10 CONTINUE
  344: *
  345: *     ==== NSHFTS is supposed to be even, but if it is odd,
  346: *     .    then simply reduce it by one.  The shuffle above
  347: *     .    ensures that the dropped shift is real and that
  348: *     .    the remaining shifts are paired. ====
  349: *
  350:       NS = NSHFTS - MOD( NSHFTS, 2 )
  351: *
  352: *     ==== Machine constants for deflation ====
  353: *
  354:       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
  355:       SAFMAX = ONE / SAFMIN
  356:       CALL DLABAD( SAFMIN, SAFMAX )
  357:       ULP = DLAMCH( 'PRECISION' )
  358:       SMLNUM = SAFMIN*( DBLE( N ) / ULP )
  359: *
  360: *     ==== Use accumulated reflections to update far-from-diagonal
  361: *     .    entries ? ====
  362: *
  363:       ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
  364: *
  365: *     ==== clear trash ====
  366: *
  367:       IF( KTOP+2.LE.KBOT )
  368:      $   H( KTOP+2, KTOP ) = ZERO
  369: *
  370: *     ==== NBMPS = number of 2-shift bulges in the chain ====
  371: *
  372:       NBMPS = NS / 2
  373: *
  374: *     ==== KDU = width of slab ====
  375: *
  376:       KDU = 4*NBMPS
  377: *
  378: *     ==== Create and chase chains of NBMPS bulges ====
  379: *
  380:       DO 180 INCOL = KTOP - 2*NBMPS + 1, KBOT - 2, 2*NBMPS
  381: *
  382: *        JTOP = Index from which updates from the right start.
  383: *
  384:          IF( ACCUM ) THEN
  385:             JTOP = MAX( KTOP, INCOL )
  386:          ELSE IF( WANTT ) THEN
  387:             JTOP = 1
  388:          ELSE
  389:             JTOP = KTOP
  390:          END IF
  391: *
  392:          NDCOL = INCOL + KDU
  393:          IF( ACCUM )
  394:      $      CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
  395: *
  396: *        ==== Near-the-diagonal bulge chase.  The following loop
  397: *        .    performs the near-the-diagonal part of a small bulge
  398: *        .    multi-shift QR sweep.  Each 4*NBMPS column diagonal
  399: *        .    chunk extends from column INCOL to column NDCOL
  400: *        .    (including both column INCOL and column NDCOL). The
  401: *        .    following loop chases a 2*NBMPS+1 column long chain of
  402: *        .    NBMPS bulges 2*NBMPS columns to the right.  (INCOL
  403: *        .    may be less than KTOP and and NDCOL may be greater than
  404: *        .    KBOT indicating phantom columns from which to chase
  405: *        .    bulges before they are actually introduced or to which
  406: *        .    to chase bulges beyond column KBOT.)  ====
  407: *
  408:          DO 145 KRCOL = INCOL, MIN( INCOL+2*NBMPS-1, KBOT-2 )
  409: *
  410: *           ==== Bulges number MTOP to MBOT are active double implicit
  411: *           .    shift bulges.  There may or may not also be small
  412: *           .    2-by-2 bulge, if there is room.  The inactive bulges
  413: *           .    (if any) must wait until the active bulges have moved
  414: *           .    down the diagonal to make room.  The phantom matrix
  415: *           .    paradigm described above helps keep track.  ====
  416: *
  417:             MTOP = MAX( 1, ( KTOP-KRCOL ) / 2+1 )
  418:             MBOT = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 2 )
  419:             M22 = MBOT + 1
  420:             BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+2*( M22-1 ) ).EQ.
  421:      $              ( KBOT-2 )
  422: *
  423: *           ==== Generate reflections to chase the chain right
  424: *           .    one column.  (The minimum value of K is KTOP-1.) ====
  425: *
  426:             IF ( BMP22 ) THEN
  427: *
  428: *              ==== Special case: 2-by-2 reflection at bottom treated
  429: *              .    separately ====
  430: *
  431:                K = KRCOL + 2*( M22-1 )
  432:                IF( K.EQ.KTOP-1 ) THEN
  433:                   CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
  434:      $                         SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
  435:      $                         V( 1, M22 ) )
  436:                   BETA = V( 1, M22 )
  437:                   CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
  438:                ELSE
  439:                   BETA = H( K+1, K )
  440:                   V( 2, M22 ) = H( K+2, K )
  441:                   CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
  442:                   H( K+1, K ) = BETA
  443:                   H( K+2, K ) = ZERO
  444:                END IF
  445: 
  446: *
  447: *              ==== Perform update from right within 
  448: *              .    computational window. ====
  449: *
  450:                T1 = V( 1, M22 )
  451:                T2 = T1*V( 2, M22 )
  452:                DO 30 J = JTOP, MIN( KBOT, K+3 )
  453:                   REFSUM = H( J, K+1 ) + V( 2, M22 )*H( J, K+2 )
  454:                   H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
  455:                   H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
  456:    30          CONTINUE
  457: *
  458: *              ==== Perform update from left within 
  459: *              .    computational window. ====
  460: *
  461:                IF( ACCUM ) THEN
  462:                   JBOT = MIN( NDCOL, KBOT )
  463:                ELSE IF( WANTT ) THEN
  464:                   JBOT = N
  465:                ELSE
  466:                   JBOT = KBOT
  467:                END IF
  468:                T1 = V( 1, M22 )
  469:                T2 = T1*V( 2, M22 )
  470:                DO 40 J = K+1, JBOT
  471:                   REFSUM = H( K+1, J ) + V( 2, M22 )*H( K+2, J )
  472:                   H( K+1, J ) = H( K+1, J ) - REFSUM*T1
  473:                   H( K+2, J ) = H( K+2, J ) - REFSUM*T2
  474:    40          CONTINUE
  475: *
  476: *              ==== The following convergence test requires that
  477: *              .    the tradition small-compared-to-nearby-diagonals
  478: *              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
  479: *              .    criteria both be satisfied.  The latter improves
  480: *              .    accuracy in some examples. Falling back on an
  481: *              .    alternate convergence criterion when TST1 or TST2
  482: *              .    is zero (as done here) is traditional but probably
  483: *              .    unnecessary. ====
  484: *
  485:                IF( K.GE.KTOP ) THEN
  486:                   IF( H( K+1, K ).NE.ZERO ) THEN
  487:                      TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
  488:                      IF( TST1.EQ.ZERO ) THEN
  489:                         IF( K.GE.KTOP+1 )
  490:      $                     TST1 = TST1 + ABS( H( K, K-1 ) )
  491:                         IF( K.GE.KTOP+2 )
  492:      $                     TST1 = TST1 + ABS( H( K, K-2 ) )
  493:                         IF( K.GE.KTOP+3 )
  494:      $                     TST1 = TST1 + ABS( H( K, K-3 ) )
  495:                         IF( K.LE.KBOT-2 )
  496:      $                     TST1 = TST1 + ABS( H( K+2, K+1 ) )
  497:                         IF( K.LE.KBOT-3 )
  498:      $                     TST1 = TST1 + ABS( H( K+3, K+1 ) )
  499:                         IF( K.LE.KBOT-4 )
  500:      $                     TST1 = TST1 + ABS( H( K+4, K+1 ) )
  501:                      END IF
  502:                      IF( ABS( H( K+1, K ) )
  503:      $                   .LE.MAX( SMLNUM, ULP*TST1 ) ) THEN
  504:                         H12 = MAX( ABS( H( K+1, K ) ),
  505:      $                             ABS( H( K, K+1 ) ) )
  506:                         H21 = MIN( ABS( H( K+1, K ) ),
  507:      $                             ABS( H( K, K+1 ) ) )
  508:                         H11 = MAX( ABS( H( K+1, K+1 ) ),
  509:      $                             ABS( H( K, K )-H( K+1, K+1 ) ) )
  510:                         H22 = MIN( ABS( H( K+1, K+1 ) ),
  511:      $                        ABS( H( K, K )-H( K+1, K+1 ) ) )
  512:                         SCL = H11 + H12
  513:                         TST2 = H22*( H11 / SCL )
  514: *
  515:                         IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
  516:      $                      MAX( SMLNUM, ULP*TST2 ) ) THEN
  517:                            H( K+1, K ) = ZERO
  518:                         END IF
  519:                      END IF
  520:                   END IF
  521:                END IF
  522: *
  523: *              ==== Accumulate orthogonal transformations. ====
  524: *
  525:                IF( ACCUM ) THEN
  526:                   KMS = K - INCOL
  527:                   T1 = V( 1, M22 )
  528:                   T2 = T1*V( 2, M22 )
  529:                   DO 50 J = MAX( 1, KTOP-INCOL ), KDU
  530:                      REFSUM = U( J, KMS+1 ) + V( 2, M22 )*U( J, KMS+2 )
  531:                      U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
  532:                      U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
  533:   50                 CONTINUE
  534:                ELSE IF( WANTZ ) THEN
  535:                   T1 = V( 1, M22 )
  536:                   T2 = T1*V( 2, M22 )
  537:                   DO 60 J = ILOZ, IHIZ
  538:                      REFSUM = Z( J, K+1 )+V( 2, M22 )*Z( J, K+2 )
  539:                      Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
  540:                      Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
  541:   60              CONTINUE
  542:                END IF
  543:             END IF
  544: *
  545: *           ==== Normal case: Chain of 3-by-3 reflections ====
  546: *
  547:             DO 80 M = MBOT, MTOP, -1
  548:                K = KRCOL + 2*( M-1 )
  549:                IF( K.EQ.KTOP-1 ) THEN
  550:                   CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
  551:      $                         SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
  552:      $                         V( 1, M ) )
  553:                   ALPHA = V( 1, M )
  554:                   CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
  555:                ELSE
  556: *
  557: *                 ==== Perform delayed transformation of row below
  558: *                 .    Mth bulge. Exploit fact that first two elements
  559: *                 .    of row are actually zero. ====
  560: *
  561:                   REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 )
  562:                   H( K+3, K   ) = -REFSUM
  563:                   H( K+3, K+1 ) = -REFSUM*V( 2, M )
  564:                   H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*V( 3, M )
  565: *
  566: *                 ==== Calculate reflection to move
  567: *                 .    Mth bulge one step. ====
  568: *
  569:                   BETA      = H( K+1, K )
  570:                   V( 2, M ) = H( K+2, K )
  571:                   V( 3, M ) = H( K+3, K )
  572:                   CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
  573: *
  574: *                 ==== A Bulge may collapse because of vigilant
  575: *                 .    deflation or destructive underflow.  In the
  576: *                 .    underflow case, try the two-small-subdiagonals
  577: *                 .    trick to try to reinflate the bulge.  ====
  578: *
  579:                   IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
  580:      $                ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
  581: *
  582: *                    ==== Typical case: not collapsed (yet). ====
  583: *
  584:                      H( K+1, K ) = BETA
  585:                      H( K+2, K ) = ZERO
  586:                      H( K+3, K ) = ZERO
  587:                   ELSE
  588: *
  589: *                    ==== Atypical case: collapsed.  Attempt to
  590: *                    .    reintroduce ignoring H(K+1,K) and H(K+2,K).
  591: *                    .    If the fill resulting from the new
  592: *                    .    reflector is too large, then abandon it.
  593: *                    .    Otherwise, use the new one. ====
  594: *
  595:                      CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
  596:      $                            SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
  597:      $                            VT )
  598:                      ALPHA = VT( 1 )
  599:                      CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
  600:                      REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
  601:      $                        H( K+2, K ) )
  602: *
  603:                      IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
  604:      $                   ABS( REFSUM*VT( 3 ) ).GT.ULP*
  605:      $                   ( ABS( H( K, K ) )+ABS( H( K+1,
  606:      $                   K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
  607: *
  608: *                       ==== Starting a new bulge here would
  609: *                       .    create non-negligible fill.  Use
  610: *                       .    the old one with trepidation. ====
  611: *
  612:                         H( K+1, K ) = BETA
  613:                         H( K+2, K ) = ZERO
  614:                         H( K+3, K ) = ZERO
  615:                      ELSE
  616: *
  617: *                       ==== Starting a new bulge here would
  618: *                       .    create only negligible fill.
  619: *                       .    Replace the old reflector with
  620: *                       .    the new one. ====
  621: *
  622:                         H( K+1, K ) = H( K+1, K ) - REFSUM
  623:                         H( K+2, K ) = ZERO
  624:                         H( K+3, K ) = ZERO
  625:                         V( 1, M ) = VT( 1 )
  626:                         V( 2, M ) = VT( 2 )
  627:                         V( 3, M ) = VT( 3 )
  628:                      END IF
  629:                   END IF
  630:                END IF
  631: *
  632: *              ====  Apply reflection from the right and
  633: *              .     the first column of update from the left.
  634: *              .     These updates are required for the vigilant
  635: *              .     deflation check. We still delay most of the
  636: *              .     updates from the left for efficiency. ====      
  637: *
  638:                T1 = V( 1, M )
  639:                T2 = T1*V( 2, M )
  640:                T3 = T1*V( 3, M )
  641:                DO 70 J = JTOP, MIN( KBOT, K+3 )
  642:                   REFSUM = H( J, K+1 ) + V( 2, M )*H( J, K+2 )
  643:      $                     + V( 3, M )*H( J, K+3 )
  644:                   H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
  645:                   H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
  646:                   H( J, K+3 ) = H( J, K+3 ) - REFSUM*T3
  647:    70          CONTINUE
  648: *
  649: *              ==== Perform update from left for subsequent
  650: *              .    column. ====
  651: *
  652:                REFSUM = H( K+1, K+1 ) + V( 2, M )*H( K+2, K+1 )
  653:      $                  + V( 3, M )*H( K+3, K+1 )
  654:                H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM*T1
  655:                H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*T2
  656:                H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*T3
  657: *
  658: *              ==== The following convergence test requires that
  659: *              .    the tradition small-compared-to-nearby-diagonals
  660: *              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
  661: *              .    criteria both be satisfied.  The latter improves
  662: *              .    accuracy in some examples. Falling back on an
  663: *              .    alternate convergence criterion when TST1 or TST2
  664: *              .    is zero (as done here) is traditional but probably
  665: *              .    unnecessary. ====
  666: *
  667:                IF( K.LT.KTOP)
  668:      $              CYCLE
  669:                IF( H( K+1, K ).NE.ZERO ) THEN
  670:                   TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
  671:                   IF( TST1.EQ.ZERO ) THEN
  672:                      IF( K.GE.KTOP+1 )
  673:      $                  TST1 = TST1 + ABS( H( K, K-1 ) )
  674:                      IF( K.GE.KTOP+2 )
  675:      $                  TST1 = TST1 + ABS( H( K, K-2 ) )
  676:                      IF( K.GE.KTOP+3 )
  677:      $                  TST1 = TST1 + ABS( H( K, K-3 ) )
  678:                      IF( K.LE.KBOT-2 )
  679:      $                  TST1 = TST1 + ABS( H( K+2, K+1 ) )
  680:                      IF( K.LE.KBOT-3 )
  681:      $                  TST1 = TST1 + ABS( H( K+3, K+1 ) )
  682:                      IF( K.LE.KBOT-4 )
  683:      $                  TST1 = TST1 + ABS( H( K+4, K+1 ) )
  684:                   END IF
  685:                   IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
  686:      $                 THEN
  687:                      H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
  688:                      H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
  689:                      H11 = MAX( ABS( H( K+1, K+1 ) ),
  690:      $                     ABS( H( K, K )-H( K+1, K+1 ) ) )
  691:                      H22 = MIN( ABS( H( K+1, K+1 ) ),
  692:      $                     ABS( H( K, K )-H( K+1, K+1 ) ) )
  693:                      SCL = H11 + H12
  694:                      TST2 = H22*( H11 / SCL )
  695: *
  696:                      IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
  697:      $                   MAX( SMLNUM, ULP*TST2 ) ) THEN
  698:                         H( K+1, K ) = ZERO
  699:                      END IF
  700:                   END IF
  701:                END IF
  702:    80       CONTINUE
  703: *
  704: *           ==== Multiply H by reflections from the left ====
  705: *
  706:             IF( ACCUM ) THEN
  707:                JBOT = MIN( NDCOL, KBOT )
  708:             ELSE IF( WANTT ) THEN
  709:                JBOT = N
  710:             ELSE
  711:                JBOT = KBOT
  712:             END IF
  713: *
  714:             DO 100 M = MBOT, MTOP, -1
  715:                K = KRCOL + 2*( M-1 )
  716:                T1 = V( 1, M )
  717:                T2 = T1*V( 2, M )
  718:                T3 = T1*V( 3, M )
  719:                DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT
  720:                   REFSUM = H( K+1, J ) + V( 2, M )*H( K+2, J )
  721:      $                     + V( 3, M )*H( K+3, J )
  722:                   H( K+1, J ) = H( K+1, J ) - REFSUM*T1
  723:                   H( K+2, J ) = H( K+2, J ) - REFSUM*T2
  724:                   H( K+3, J ) = H( K+3, J ) - REFSUM*T3
  725:    90          CONTINUE
  726:   100       CONTINUE
  727: *
  728: *           ==== Accumulate orthogonal transformations. ====
  729: *
  730:             IF( ACCUM ) THEN
  731: *
  732: *              ==== Accumulate U. (If needed, update Z later
  733: *              .    with an efficient matrix-matrix
  734: *              .    multiply.) ====
  735: *
  736:                DO 120 M = MBOT, MTOP, -1
  737:                   K = KRCOL + 2*( M-1 )
  738:                   KMS = K - INCOL
  739:                   I2 = MAX( 1, KTOP-INCOL )
  740:                   I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 )
  741:                   I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 )
  742:                   T1 = V( 1, M )
  743:                   T2 = T1*V( 2, M )
  744:                   T3 = T1*V( 3, M )
  745:                   DO 110 J = I2, I4
  746:                      REFSUM = U( J, KMS+1 ) + V( 2, M )*U( J, KMS+2 )
  747:      $                        + V( 3, M )*U( J, KMS+3 )
  748:                      U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
  749:                      U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
  750:                      U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*T3
  751:   110             CONTINUE
  752:   120          CONTINUE
  753:             ELSE IF( WANTZ ) THEN
  754: *
  755: *              ==== U is not accumulated, so update Z
  756: *              .    now by multiplying by reflections
  757: *              .    from the right. ====
  758: *
  759:                DO 140 M = MBOT, MTOP, -1
  760:                   K = KRCOL + 2*( M-1 )
  761:                   T1 = V( 1, M )
  762:                   T2 = T1*V( 2, M )
  763:                   T3 = T1*V( 3, M )
  764:                   DO 130 J = ILOZ, IHIZ
  765:                      REFSUM = Z( J, K+1 ) + V( 2, M )*Z( J, K+2 )
  766:      $                        + V( 3, M )*Z( J, K+3 )
  767:                      Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
  768:                      Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
  769:                      Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*T3
  770:   130             CONTINUE
  771:   140          CONTINUE
  772:             END IF
  773: *
  774: *           ==== End of near-the-diagonal bulge chase. ====
  775: *
  776:   145    CONTINUE
  777: *
  778: *        ==== Use U (if accumulated) to update far-from-diagonal
  779: *        .    entries in H.  If required, use U to update Z as
  780: *        .    well. ====
  781: *
  782:          IF( ACCUM ) THEN
  783:             IF( WANTT ) THEN
  784:                JTOP = 1
  785:                JBOT = N
  786:             ELSE
  787:                JTOP = KTOP
  788:                JBOT = KBOT
  789:             END IF
  790:             K1 = MAX( 1, KTOP-INCOL )
  791:             NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
  792: *
  793: *           ==== Horizontal Multiply ====
  794: *
  795:             DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
  796:                JLEN = MIN( NH, JBOT-JCOL+1 )
  797:                CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
  798:      $                        LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
  799:      $                        LDWH )
  800:                CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH,
  801:      $                         H( INCOL+K1, JCOL ), LDH )
  802:   150       CONTINUE
  803: *
  804: *           ==== Vertical multiply ====
  805: *
  806:             DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
  807:                JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
  808:                CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
  809:      $                     H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
  810:      $                     LDU, ZERO, WV, LDWV )
  811:                CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
  812:      $                      H( JROW, INCOL+K1 ), LDH )
  813:   160       CONTINUE
  814: *
  815: *           ==== Z multiply (also vertical) ====
  816: *
  817:             IF( WANTZ ) THEN
  818:                DO 170 JROW = ILOZ, IHIZ, NV
  819:                   JLEN = MIN( NV, IHIZ-JROW+1 )
  820:                   CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
  821:      $                        Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
  822:      $                        LDU, ZERO, WV, LDWV )
  823:                   CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
  824:      $                         Z( JROW, INCOL+K1 ), LDZ )
  825:   170          CONTINUE
  826:             END IF
  827:          END IF
  828:   180 CONTINUE
  829: *
  830: *     ==== End of DLAQR5 ====
  831: *
  832:       END

CVSweb interface <joel.bertrand@systella.fr>