File:  [local] / rpl / lapack / lapack / zlaqr5.f
Revision 1.17: download - view: text, annotated - select for diffs - revision graph
Sat Aug 27 15:34:59 2016 UTC (7 years, 8 months ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_25, HEAD
Cohérence Lapack.

    1: *> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep.
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at 
    6: *            http://www.netlib.org/lapack/explore-html/ 
    7: *
    8: *> \htmlonly
    9: *> Download ZLAQR5 + dependencies 
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f"> 
   11: *> [TGZ]</a> 
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f"> 
   13: *> [ZIP]</a> 
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f"> 
   15: *> [TXT]</a>
   16: *> \endhtmlonly 
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *       SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
   22: *                          H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
   23: *                          WV, LDWV, NH, WH, LDWH )
   24:    25: *       .. Scalar Arguments ..
   26: *       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
   27: *      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
   28: *       LOGICAL            WANTT, WANTZ
   29: *       ..
   30: *       .. Array Arguments ..
   31: *       COMPLEX*16         H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
   32: *      $                   WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
   33: *       ..
   34: *  
   35: *
   36: *> \par Purpose:
   37: *  =============
   38: *>
   39: *> \verbatim
   40: *>
   41: *>    ZLAQR5, called by ZLAQR0, performs a
   42: *>    single small-bulge multi-shift QR sweep.
   43: *> \endverbatim
   44: *
   45: *  Arguments:
   46: *  ==========
   47: *
   48: *> \param[in] WANTT
   49: *> \verbatim
   50: *>          WANTT is logical scalar
   51: *>             WANTT = .true. if the triangular Schur factor
   52: *>             is being computed.  WANTT is set to .false. otherwise.
   53: *> \endverbatim
   54: *>
   55: *> \param[in] WANTZ
   56: *> \verbatim
   57: *>          WANTZ is logical scalar
   58: *>             WANTZ = .true. if the unitary Schur factor is being
   59: *>             computed.  WANTZ is set to .false. otherwise.
   60: *> \endverbatim
   61: *>
   62: *> \param[in] KACC22
   63: *> \verbatim
   64: *>          KACC22 is integer with value 0, 1, or 2.
   65: *>             Specifies the computation mode of far-from-diagonal
   66: *>             orthogonal updates.
   67: *>        = 0: ZLAQR5 does not accumulate reflections and does not
   68: *>             use matrix-matrix multiply to update far-from-diagonal
   69: *>             matrix entries.
   70: *>        = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
   71: *>             multiply to update the far-from-diagonal matrix entries.
   72: *>        = 2: ZLAQR5 accumulates reflections, uses matrix-matrix
   73: *>             multiply to update the far-from-diagonal matrix entries,
   74: *>             and takes advantage of 2-by-2 block structure during
   75: *>             matrix multiplies.
   76: *> \endverbatim
   77: *>
   78: *> \param[in] N
   79: *> \verbatim
   80: *>          N is integer scalar
   81: *>             N is the order of the Hessenberg matrix H upon which this
   82: *>             subroutine operates.
   83: *> \endverbatim
   84: *>
   85: *> \param[in] KTOP
   86: *> \verbatim
   87: *>          KTOP is integer scalar
   88: *> \endverbatim
   89: *>
   90: *> \param[in] KBOT
   91: *> \verbatim
   92: *>          KBOT is integer scalar
   93: *>             These are the first and last rows and columns of an
   94: *>             isolated diagonal block upon which the QR sweep is to be
   95: *>             applied. It is assumed without a check that
   96: *>                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
   97: *>             and
   98: *>                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
   99: *> \endverbatim
  100: *>
  101: *> \param[in] NSHFTS
  102: *> \verbatim
  103: *>          NSHFTS is integer scalar
  104: *>             NSHFTS gives the number of simultaneous shifts.  NSHFTS
  105: *>             must be positive and even.
  106: *> \endverbatim
  107: *>
  108: *> \param[in,out] S
  109: *> \verbatim
  110: *>          S is COMPLEX*16 array of size (NSHFTS)
  111: *>             S contains the shifts of origin that define the multi-
  112: *>             shift QR sweep.  On output S may be reordered.
  113: *> \endverbatim
  114: *>
  115: *> \param[in,out] H
  116: *> \verbatim
  117: *>          H is COMPLEX*16 array of size (LDH,N)
  118: *>             On input H contains a Hessenberg matrix.  On output a
  119: *>             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
  120: *>             to the isolated diagonal block in rows and columns KTOP
  121: *>             through KBOT.
  122: *> \endverbatim
  123: *>
  124: *> \param[in] LDH
  125: *> \verbatim
  126: *>          LDH is integer scalar
  127: *>             LDH is the leading dimension of H just as declared in the
  128: *>             calling procedure.  LDH.GE.MAX(1,N).
  129: *> \endverbatim
  130: *>
  131: *> \param[in] ILOZ
  132: *> \verbatim
  133: *>          ILOZ is INTEGER
  134: *> \endverbatim
  135: *>
  136: *> \param[in] IHIZ
  137: *> \verbatim
  138: *>          IHIZ is INTEGER
  139: *>             Specify the rows of Z to which transformations must be
  140: *>             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
  141: *> \endverbatim
  142: *>
  143: *> \param[in,out] Z
  144: *> \verbatim
  145: *>          Z is COMPLEX*16 array of size (LDZ,IHIZ)
  146: *>             If WANTZ = .TRUE., then the QR Sweep unitary
  147: *>             similarity transformation is accumulated into
  148: *>             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
  149: *>             If WANTZ = .FALSE., then Z is unreferenced.
  150: *> \endverbatim
  151: *>
  152: *> \param[in] LDZ
  153: *> \verbatim
  154: *>          LDZ is integer scalar
  155: *>             LDA is the leading dimension of Z just as declared in
  156: *>             the calling procedure. LDZ.GE.N.
  157: *> \endverbatim
  158: *>
  159: *> \param[out] V
  160: *> \verbatim
  161: *>          V is COMPLEX*16 array of size (LDV,NSHFTS/2)
  162: *> \endverbatim
  163: *>
  164: *> \param[in] LDV
  165: *> \verbatim
  166: *>          LDV is integer scalar
  167: *>             LDV is the leading dimension of V as declared in the
  168: *>             calling procedure.  LDV.GE.3.
  169: *> \endverbatim
  170: *>
  171: *> \param[out] U
  172: *> \verbatim
  173: *>          U is COMPLEX*16 array of size
  174: *>             (LDU,3*NSHFTS-3)
  175: *> \endverbatim
  176: *>
  177: *> \param[in] LDU
  178: *> \verbatim
  179: *>          LDU is integer scalar
  180: *>             LDU is the leading dimension of U just as declared in the
  181: *>             in the calling subroutine.  LDU.GE.3*NSHFTS-3.
  182: *> \endverbatim
  183: *>
  184: *> \param[in] NH
  185: *> \verbatim
  186: *>          NH is integer scalar
  187: *>             NH is the number of columns in array WH available for
  188: *>             workspace. NH.GE.1.
  189: *> \endverbatim
  190: *>
  191: *> \param[out] WH
  192: *> \verbatim
  193: *>          WH is COMPLEX*16 array of size (LDWH,NH)
  194: *> \endverbatim
  195: *>
  196: *> \param[in] LDWH
  197: *> \verbatim
  198: *>          LDWH is integer scalar
  199: *>             Leading dimension of WH just as declared in the
  200: *>             calling procedure.  LDWH.GE.3*NSHFTS-3.
  201: *> \endverbatim
  202: *>
  203: *> \param[in] NV
  204: *> \verbatim
  205: *>          NV is integer scalar
  206: *>             NV is the number of rows in WV agailable for workspace.
  207: *>             NV.GE.1.
  208: *> \endverbatim
  209: *>
  210: *> \param[out] WV
  211: *> \verbatim
  212: *>          WV is COMPLEX*16 array of size
  213: *>             (LDWV,3*NSHFTS-3)
  214: *> \endverbatim
  215: *>
  216: *> \param[in] LDWV
  217: *> \verbatim
  218: *>          LDWV is integer scalar
  219: *>             LDWV is the leading dimension of WV as declared in the
  220: *>             in the calling subroutine.  LDWV.GE.NV.
  221: *> \endverbatim
  222: *
  223: *  Authors:
  224: *  ========
  225: *
  226: *> \author Univ. of Tennessee 
  227: *> \author Univ. of California Berkeley 
  228: *> \author Univ. of Colorado Denver 
  229: *> \author NAG Ltd. 
  230: *
  231: *> \date June 2016
  232: *
  233: *> \ingroup complex16OTHERauxiliary
  234: *
  235: *> \par Contributors:
  236: *  ==================
  237: *>
  238: *>       Karen Braman and Ralph Byers, Department of Mathematics,
  239: *>       University of Kansas, USA
  240: *
  241: *> \par References:
  242: *  ================
  243: *>
  244: *>       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
  245: *>       Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
  246: *>       Performance, SIAM Journal of Matrix Analysis, volume 23, pages
  247: *>       929--947, 2002.
  248: *>
  249: *  =====================================================================
  250:       SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
  251:      $                   H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
  252:      $                   WV, LDWV, NH, WH, LDWH )
  253: *
  254: *  -- LAPACK auxiliary routine (version 3.6.1) --
  255: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  256: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  257: *     June 2016
  258: *
  259: *     .. Scalar Arguments ..
  260:       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
  261:      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
  262:       LOGICAL            WANTT, WANTZ
  263: *     ..
  264: *     .. Array Arguments ..
  265:       COMPLEX*16         H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
  266:      $                   WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
  267: *     ..
  268: *
  269: *  ================================================================
  270: *     .. Parameters ..
  271:       COMPLEX*16         ZERO, ONE
  272:       PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
  273:      $                   ONE = ( 1.0d0, 0.0d0 ) )
  274:       DOUBLE PRECISION   RZERO, RONE
  275:       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
  276: *     ..
  277: *     .. Local Scalars ..
  278:       COMPLEX*16         ALPHA, BETA, CDUM, REFSUM
  279:       DOUBLE PRECISION   H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
  280:      $                   SMLNUM, TST1, TST2, ULP
  281:       INTEGER            I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
  282:      $                   JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
  283:      $                   M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
  284:      $                   NS, NU
  285:       LOGICAL            ACCUM, BLK22, BMP22
  286: *     ..
  287: *     .. External Functions ..
  288:       DOUBLE PRECISION   DLAMCH
  289:       EXTERNAL           DLAMCH
  290: *     ..
  291: *     .. Intrinsic Functions ..
  292: *
  293:       INTRINSIC          ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD
  294: *     ..
  295: *     .. Local Arrays ..
  296:       COMPLEX*16         VT( 3 )
  297: *     ..
  298: *     .. External Subroutines ..
  299:       EXTERNAL           DLABAD, ZGEMM, ZLACPY, ZLAQR1, ZLARFG, ZLASET,
  300:      $                   ZTRMM
  301: *     ..
  302: *     .. Statement Functions ..
  303:       DOUBLE PRECISION   CABS1
  304: *     ..
  305: *     .. Statement Function definitions ..
  306:       CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
  307: *     ..
  308: *     .. Executable Statements ..
  309: *
  310: *     ==== If there are no shifts, then there is nothing to do. ====
  311: *
  312:       IF( NSHFTS.LT.2 )
  313:      $   RETURN
  314: *
  315: *     ==== If the active block is empty or 1-by-1, then there
  316: *     .    is nothing to do. ====
  317: *
  318:       IF( KTOP.GE.KBOT )
  319:      $   RETURN
  320: *
  321: *     ==== NSHFTS is supposed to be even, but if it is odd,
  322: *     .    then simply reduce it by one.  ====
  323: *
  324:       NS = NSHFTS - MOD( NSHFTS, 2 )
  325: *
  326: *     ==== Machine constants for deflation ====
  327: *
  328:       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
  329:       SAFMAX = RONE / SAFMIN
  330:       CALL DLABAD( SAFMIN, SAFMAX )
  331:       ULP = DLAMCH( 'PRECISION' )
  332:       SMLNUM = SAFMIN*( DBLE( N ) / ULP )
  333: *
  334: *     ==== Use accumulated reflections to update far-from-diagonal
  335: *     .    entries ? ====
  336: *
  337:       ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
  338: *
  339: *     ==== If so, exploit the 2-by-2 block structure? ====
  340: *
  341:       BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
  342: *
  343: *     ==== clear trash ====
  344: *
  345:       IF( KTOP+2.LE.KBOT )
  346:      $   H( KTOP+2, KTOP ) = ZERO
  347: *
  348: *     ==== NBMPS = number of 2-shift bulges in the chain ====
  349: *
  350:       NBMPS = NS / 2
  351: *
  352: *     ==== KDU = width of slab ====
  353: *
  354:       KDU = 6*NBMPS - 3
  355: *
  356: *     ==== Create and chase chains of NBMPS bulges ====
  357: *
  358:       DO 210 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
  359:          NDCOL = INCOL + KDU
  360:          IF( ACCUM )
  361:      $      CALL ZLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
  362: *
  363: *        ==== Near-the-diagonal bulge chase.  The following loop
  364: *        .    performs the near-the-diagonal part of a small bulge
  365: *        .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal
  366: *        .    chunk extends from column INCOL to column NDCOL
  367: *        .    (including both column INCOL and column NDCOL). The
  368: *        .    following loop chases a 3*NBMPS column long chain of
  369: *        .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL
  370: *        .    may be less than KTOP and and NDCOL may be greater than
  371: *        .    KBOT indicating phantom columns from which to chase
  372: *        .    bulges before they are actually introduced or to which
  373: *        .    to chase bulges beyond column KBOT.)  ====
  374: *
  375:          DO 140 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
  376: *
  377: *           ==== Bulges number MTOP to MBOT are active double implicit
  378: *           .    shift bulges.  There may or may not also be small
  379: *           .    2-by-2 bulge, if there is room.  The inactive bulges
  380: *           .    (if any) must wait until the active bulges have moved
  381: *           .    down the diagonal to make room.  The phantom matrix
  382: *           .    paradigm described above helps keep track.  ====
  383: *
  384:             MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
  385:             MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
  386:             M22 = MBOT + 1
  387:             BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
  388:      $              ( KBOT-2 )
  389: *
  390: *           ==== Generate reflections to chase the chain right
  391: *           .    one column.  (The minimum value of K is KTOP-1.) ====
  392: *
  393:             DO 10 M = MTOP, MBOT
  394:                K = KRCOL + 3*( M-1 )
  395:                IF( K.EQ.KTOP-1 ) THEN
  396:                   CALL ZLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ),
  397:      $                         S( 2*M ), V( 1, M ) )
  398:                   ALPHA = V( 1, M )
  399:                   CALL ZLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
  400:                ELSE
  401:                   BETA = H( K+1, K )
  402:                   V( 2, M ) = H( K+2, K )
  403:                   V( 3, M ) = H( K+3, K )
  404:                   CALL ZLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
  405: *
  406: *                 ==== A Bulge may collapse because of vigilant
  407: *                 .    deflation or destructive underflow.  In the
  408: *                 .    underflow case, try the two-small-subdiagonals
  409: *                 .    trick to try to reinflate the bulge.  ====
  410: *
  411:                   IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
  412:      $                ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
  413: *
  414: *                    ==== Typical case: not collapsed (yet). ====
  415: *
  416:                      H( K+1, K ) = BETA
  417:                      H( K+2, K ) = ZERO
  418:                      H( K+3, K ) = ZERO
  419:                   ELSE
  420: *
  421: *                    ==== Atypical case: collapsed.  Attempt to
  422: *                    .    reintroduce ignoring H(K+1,K) and H(K+2,K).
  423: *                    .    If the fill resulting from the new
  424: *                    .    reflector is too large, then abandon it.
  425: *                    .    Otherwise, use the new one. ====
  426: *
  427:                      CALL ZLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ),
  428:      $                            S( 2*M ), VT )
  429:                      ALPHA = VT( 1 )
  430:                      CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
  431:                      REFSUM = DCONJG( VT( 1 ) )*
  432:      $                        ( H( K+1, K )+DCONJG( VT( 2 ) )*
  433:      $                        H( K+2, K ) )
  434: *
  435:                      IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+
  436:      $                   CABS1( REFSUM*VT( 3 ) ).GT.ULP*
  437:      $                   ( CABS1( H( K, K ) )+CABS1( H( K+1,
  438:      $                   K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN
  439: *
  440: *                       ==== Starting a new bulge here would
  441: *                       .    create non-negligible fill.  Use
  442: *                       .    the old one with trepidation. ====
  443: *
  444:                         H( K+1, K ) = BETA
  445:                         H( K+2, K ) = ZERO
  446:                         H( K+3, K ) = ZERO
  447:                      ELSE
  448: *
  449: *                       ==== Stating a new bulge here would
  450: *                       .    create only negligible fill.
  451: *                       .    Replace the old reflector with
  452: *                       .    the new one. ====
  453: *
  454:                         H( K+1, K ) = H( K+1, K ) - REFSUM
  455:                         H( K+2, K ) = ZERO
  456:                         H( K+3, K ) = ZERO
  457:                         V( 1, M ) = VT( 1 )
  458:                         V( 2, M ) = VT( 2 )
  459:                         V( 3, M ) = VT( 3 )
  460:                      END IF
  461:                   END IF
  462:                END IF
  463:    10       CONTINUE
  464: *
  465: *           ==== Generate a 2-by-2 reflection, if needed. ====
  466: *
  467:             K = KRCOL + 3*( M22-1 )
  468:             IF( BMP22 ) THEN
  469:                IF( K.EQ.KTOP-1 ) THEN
  470:                   CALL ZLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ),
  471:      $                         S( 2*M22 ), V( 1, M22 ) )
  472:                   BETA = V( 1, M22 )
  473:                   CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
  474:                ELSE
  475:                   BETA = H( K+1, K )
  476:                   V( 2, M22 ) = H( K+2, K )
  477:                   CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
  478:                   H( K+1, K ) = BETA
  479:                   H( K+2, K ) = ZERO
  480:                END IF
  481:             END IF
  482: *
  483: *           ==== Multiply H by reflections from the left ====
  484: *
  485:             IF( ACCUM ) THEN
  486:                JBOT = MIN( NDCOL, KBOT )
  487:             ELSE IF( WANTT ) THEN
  488:                JBOT = N
  489:             ELSE
  490:                JBOT = KBOT
  491:             END IF
  492:             DO 30 J = MAX( KTOP, KRCOL ), JBOT
  493:                MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
  494:                DO 20 M = MTOP, MEND
  495:                   K = KRCOL + 3*( M-1 )
  496:                   REFSUM = DCONJG( V( 1, M ) )*
  497:      $                     ( H( K+1, J )+DCONJG( V( 2, M ) )*
  498:      $                     H( K+2, J )+DCONJG( V( 3, M ) )*H( K+3, J ) )
  499:                   H( K+1, J ) = H( K+1, J ) - REFSUM
  500:                   H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
  501:                   H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
  502:    20          CONTINUE
  503:    30       CONTINUE
  504:             IF( BMP22 ) THEN
  505:                K = KRCOL + 3*( M22-1 )
  506:                DO 40 J = MAX( K+1, KTOP ), JBOT
  507:                   REFSUM = DCONJG( V( 1, M22 ) )*
  508:      $                     ( H( K+1, J )+DCONJG( V( 2, M22 ) )*
  509:      $                     H( K+2, J ) )
  510:                   H( K+1, J ) = H( K+1, J ) - REFSUM
  511:                   H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
  512:    40          CONTINUE
  513:             END IF
  514: *
  515: *           ==== Multiply H by reflections from the right.
  516: *           .    Delay filling in the last row until the
  517: *           .    vigilant deflation check is complete. ====
  518: *
  519:             IF( ACCUM ) THEN
  520:                JTOP = MAX( KTOP, INCOL )
  521:             ELSE IF( WANTT ) THEN
  522:                JTOP = 1
  523:             ELSE
  524:                JTOP = KTOP
  525:             END IF
  526:             DO 80 M = MTOP, MBOT
  527:                IF( V( 1, M ).NE.ZERO ) THEN
  528:                   K = KRCOL + 3*( M-1 )
  529:                   DO 50 J = JTOP, MIN( KBOT, K+3 )
  530:                      REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
  531:      $                        H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
  532:                      H( J, K+1 ) = H( J, K+1 ) - REFSUM
  533:                      H( J, K+2 ) = H( J, K+2 ) -
  534:      $                             REFSUM*DCONJG( V( 2, M ) )
  535:                      H( J, K+3 ) = H( J, K+3 ) -
  536:      $                             REFSUM*DCONJG( V( 3, M ) )
  537:    50             CONTINUE
  538: *
  539:                   IF( ACCUM ) THEN
  540: *
  541: *                    ==== Accumulate U. (If necessary, update Z later
  542: *                    .    with with an efficient matrix-matrix
  543: *                    .    multiply.) ====
  544: *
  545:                      KMS = K - INCOL
  546:                      DO 60 J = MAX( 1, KTOP-INCOL ), KDU
  547:                         REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
  548:      $                           U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
  549:                         U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
  550:                         U( J, KMS+2 ) = U( J, KMS+2 ) -
  551:      $                                  REFSUM*DCONJG( V( 2, M ) )
  552:                         U( J, KMS+3 ) = U( J, KMS+3 ) -
  553:      $                                  REFSUM*DCONJG( V( 3, M ) )
  554:    60                CONTINUE
  555:                   ELSE IF( WANTZ ) THEN
  556: *
  557: *                    ==== U is not accumulated, so update Z
  558: *                    .    now by multiplying by reflections
  559: *                    .    from the right. ====
  560: *
  561:                      DO 70 J = ILOZ, IHIZ
  562:                         REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
  563:      $                           Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
  564:                         Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
  565:                         Z( J, K+2 ) = Z( J, K+2 ) -
  566:      $                                REFSUM*DCONJG( V( 2, M ) )
  567:                         Z( J, K+3 ) = Z( J, K+3 ) -
  568:      $                                REFSUM*DCONJG( V( 3, M ) )
  569:    70                CONTINUE
  570:                   END IF
  571:                END IF
  572:    80       CONTINUE
  573: *
  574: *           ==== Special case: 2-by-2 reflection (if needed) ====
  575: *
  576:             K = KRCOL + 3*( M22-1 )
  577:             IF( BMP22 ) THEN
  578:                IF ( V( 1, M22 ).NE.ZERO ) THEN
  579:                   DO 90 J = JTOP, MIN( KBOT, K+3 )
  580:                      REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
  581:      $                        H( J, K+2 ) )
  582:                      H( J, K+1 ) = H( J, K+1 ) - REFSUM
  583:                      H( J, K+2 ) = H( J, K+2 ) -
  584:      $                             REFSUM*DCONJG( V( 2, M22 ) )
  585:    90             CONTINUE
  586: *
  587:                   IF( ACCUM ) THEN
  588:                      KMS = K - INCOL
  589:                      DO 100 J = MAX( 1, KTOP-INCOL ), KDU
  590:                         REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
  591:      $                           V( 2, M22 )*U( J, KMS+2 ) )
  592:                         U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
  593:                         U( J, KMS+2 ) = U( J, KMS+2 ) -
  594:      $                                  REFSUM*DCONJG( V( 2, M22 ) )
  595:   100                CONTINUE
  596:                   ELSE IF( WANTZ ) THEN
  597:                      DO 110 J = ILOZ, IHIZ
  598:                         REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
  599:      $                           Z( J, K+2 ) )
  600:                         Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
  601:                         Z( J, K+2 ) = Z( J, K+2 ) -
  602:      $                                REFSUM*DCONJG( V( 2, M22 ) )
  603:   110                CONTINUE
  604:                   END IF
  605:                END IF
  606:             END IF
  607: *
  608: *           ==== Vigilant deflation check ====
  609: *
  610:             MSTART = MTOP
  611:             IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
  612:      $         MSTART = MSTART + 1
  613:             MEND = MBOT
  614:             IF( BMP22 )
  615:      $         MEND = MEND + 1
  616:             IF( KRCOL.EQ.KBOT-2 )
  617:      $         MEND = MEND + 1
  618:             DO 120 M = MSTART, MEND
  619:                K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
  620: *
  621: *              ==== The following convergence test requires that
  622: *              .    the tradition small-compared-to-nearby-diagonals
  623: *              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
  624: *              .    criteria both be satisfied.  The latter improves
  625: *              .    accuracy in some examples. Falling back on an
  626: *              .    alternate convergence criterion when TST1 or TST2
  627: *              .    is zero (as done here) is traditional but probably
  628: *              .    unnecessary. ====
  629: *
  630:                IF( H( K+1, K ).NE.ZERO ) THEN
  631:                   TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) )
  632:                   IF( TST1.EQ.RZERO ) THEN
  633:                      IF( K.GE.KTOP+1 )
  634:      $                  TST1 = TST1 + CABS1( H( K, K-1 ) )
  635:                      IF( K.GE.KTOP+2 )
  636:      $                  TST1 = TST1 + CABS1( H( K, K-2 ) )
  637:                      IF( K.GE.KTOP+3 )
  638:      $                  TST1 = TST1 + CABS1( H( K, K-3 ) )
  639:                      IF( K.LE.KBOT-2 )
  640:      $                  TST1 = TST1 + CABS1( H( K+2, K+1 ) )
  641:                      IF( K.LE.KBOT-3 )
  642:      $                  TST1 = TST1 + CABS1( H( K+3, K+1 ) )
  643:                      IF( K.LE.KBOT-4 )
  644:      $                  TST1 = TST1 + CABS1( H( K+4, K+1 ) )
  645:                   END IF
  646:                   IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
  647:      $                 THEN
  648:                      H12 = MAX( CABS1( H( K+1, K ) ),
  649:      $                     CABS1( H( K, K+1 ) ) )
  650:                      H21 = MIN( CABS1( H( K+1, K ) ),
  651:      $                     CABS1( H( K, K+1 ) ) )
  652:                      H11 = MAX( CABS1( H( K+1, K+1 ) ),
  653:      $                     CABS1( H( K, K )-H( K+1, K+1 ) ) )
  654:                      H22 = MIN( CABS1( H( K+1, K+1 ) ),
  655:      $                     CABS1( H( K, K )-H( K+1, K+1 ) ) )
  656:                      SCL = H11 + H12
  657:                      TST2 = H22*( H11 / SCL )
  658: *
  659:                      IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE.
  660:      $                   MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
  661:                   END IF
  662:                END IF
  663:   120       CONTINUE
  664: *
  665: *           ==== Fill in the last row of each bulge. ====
  666: *
  667:             MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
  668:             DO 130 M = MTOP, MEND
  669:                K = KRCOL + 3*( M-1 )
  670:                REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
  671:                H( K+4, K+1 ) = -REFSUM
  672:                H( K+4, K+2 ) = -REFSUM*DCONJG( V( 2, M ) )
  673:                H( K+4, K+3 ) = H( K+4, K+3 ) -
  674:      $                         REFSUM*DCONJG( V( 3, M ) )
  675:   130       CONTINUE
  676: *
  677: *           ==== End of near-the-diagonal bulge chase. ====
  678: *
  679:   140    CONTINUE
  680: *
  681: *        ==== Use U (if accumulated) to update far-from-diagonal
  682: *        .    entries in H.  If required, use U to update Z as
  683: *        .    well. ====
  684: *
  685:          IF( ACCUM ) THEN
  686:             IF( WANTT ) THEN
  687:                JTOP = 1
  688:                JBOT = N
  689:             ELSE
  690:                JTOP = KTOP
  691:                JBOT = KBOT
  692:             END IF
  693:             IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
  694:      $          ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
  695: *
  696: *              ==== Updates not exploiting the 2-by-2 block
  697: *              .    structure of U.  K1 and NU keep track of
  698: *              .    the location and size of U in the special
  699: *              .    cases of introducing bulges and chasing
  700: *              .    bulges off the bottom.  In these special
  701: *              .    cases and in case the number of shifts
  702: *              .    is NS = 2, there is no 2-by-2 block
  703: *              .    structure to exploit.  ====
  704: *
  705:                K1 = MAX( 1, KTOP-INCOL )
  706:                NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
  707: *
  708: *              ==== Horizontal Multiply ====
  709: *
  710:                DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
  711:                   JLEN = MIN( NH, JBOT-JCOL+1 )
  712:                   CALL ZGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
  713:      $                        LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
  714:      $                        LDWH )
  715:                   CALL ZLACPY( 'ALL', NU, JLEN, WH, LDWH,
  716:      $                         H( INCOL+K1, JCOL ), LDH )
  717:   150          CONTINUE
  718: *
  719: *              ==== Vertical multiply ====
  720: *
  721:                DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
  722:                   JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
  723:                   CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
  724:      $                        H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
  725:      $                        LDU, ZERO, WV, LDWV )
  726:                   CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
  727:      $                         H( JROW, INCOL+K1 ), LDH )
  728:   160          CONTINUE
  729: *
  730: *              ==== Z multiply (also vertical) ====
  731: *
  732:                IF( WANTZ ) THEN
  733:                   DO 170 JROW = ILOZ, IHIZ, NV
  734:                      JLEN = MIN( NV, IHIZ-JROW+1 )
  735:                      CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
  736:      $                           Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
  737:      $                           LDU, ZERO, WV, LDWV )
  738:                      CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
  739:      $                            Z( JROW, INCOL+K1 ), LDZ )
  740:   170             CONTINUE
  741:                END IF
  742:             ELSE
  743: *
  744: *              ==== Updates exploiting U's 2-by-2 block structure.
  745: *              .    (I2, I4, J2, J4 are the last rows and columns
  746: *              .    of the blocks.) ====
  747: *
  748:                I2 = ( KDU+1 ) / 2
  749:                I4 = KDU
  750:                J2 = I4 - I2
  751:                J4 = KDU
  752: *
  753: *              ==== KZS and KNZ deal with the band of zeros
  754: *              .    along the diagonal of one of the triangular
  755: *              .    blocks. ====
  756: *
  757:                KZS = ( J4-J2 ) - ( NS+1 )
  758:                KNZ = NS + 1
  759: *
  760: *              ==== Horizontal multiply ====
  761: *
  762:                DO 180 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
  763:                   JLEN = MIN( NH, JBOT-JCOL+1 )
  764: *
  765: *                 ==== Copy bottom of H to top+KZS of scratch ====
  766: *                  (The first KZS rows get multiplied by zero.) ====
  767: *
  768:                   CALL ZLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
  769:      $                         LDH, WH( KZS+1, 1 ), LDWH )
  770: *
  771: *                 ==== Multiply by U21**H ====
  772: *
  773:                   CALL ZLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
  774:                   CALL ZTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
  775:      $                        U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
  776:      $                        LDWH )
  777: *
  778: *                 ==== Multiply top of H by U11**H ====
  779: *
  780:                   CALL ZGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
  781:      $                        H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
  782: *
  783: *                 ==== Copy top of H to bottom of WH ====
  784: *
  785:                   CALL ZLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
  786:      $                         WH( I2+1, 1 ), LDWH )
  787: *
  788: *                 ==== Multiply by U21**H ====
  789: *
  790:                   CALL ZTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
  791:      $                        U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
  792: *
  793: *                 ==== Multiply by U22 ====
  794: *
  795:                   CALL ZGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
  796:      $                        U( J2+1, I2+1 ), LDU,
  797:      $                        H( INCOL+1+J2, JCOL ), LDH, ONE,
  798:      $                        WH( I2+1, 1 ), LDWH )
  799: *
  800: *                 ==== Copy it back ====
  801: *
  802:                   CALL ZLACPY( 'ALL', KDU, JLEN, WH, LDWH,
  803:      $                         H( INCOL+1, JCOL ), LDH )
  804:   180          CONTINUE
  805: *
  806: *              ==== Vertical multiply ====
  807: *
  808:                DO 190 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
  809:                   JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
  810: *
  811: *                 ==== Copy right of H to scratch (the first KZS
  812: *                 .    columns get multiplied by zero) ====
  813: *
  814:                   CALL ZLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
  815:      $                         LDH, WV( 1, 1+KZS ), LDWV )
  816: *
  817: *                 ==== Multiply by U21 ====
  818: *
  819:                   CALL ZLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
  820:                   CALL ZTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
  821:      $                        U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
  822:      $                        LDWV )
  823: *
  824: *                 ==== Multiply by U11 ====
  825: *
  826:                   CALL ZGEMM( 'N', 'N', JLEN, I2, J2, ONE,
  827:      $                        H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
  828:      $                        LDWV )
  829: *
  830: *                 ==== Copy left of H to right of scratch ====
  831: *
  832:                   CALL ZLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
  833:      $                         WV( 1, 1+I2 ), LDWV )
  834: *
  835: *                 ==== Multiply by U21 ====
  836: *
  837:                   CALL ZTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
  838:      $                        U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
  839: *
  840: *                 ==== Multiply by U22 ====
  841: *
  842:                   CALL ZGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
  843:      $                        H( JROW, INCOL+1+J2 ), LDH,
  844:      $                        U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
  845:      $                        LDWV )
  846: *
  847: *                 ==== Copy it back ====
  848: *
  849:                   CALL ZLACPY( 'ALL', JLEN, KDU, WV, LDWV,
  850:      $                         H( JROW, INCOL+1 ), LDH )
  851:   190          CONTINUE
  852: *
  853: *              ==== Multiply Z (also vertical) ====
  854: *
  855:                IF( WANTZ ) THEN
  856:                   DO 200 JROW = ILOZ, IHIZ, NV
  857:                      JLEN = MIN( NV, IHIZ-JROW+1 )
  858: *
  859: *                    ==== Copy right of Z to left of scratch (first
  860: *                    .     KZS columns get multiplied by zero) ====
  861: *
  862:                      CALL ZLACPY( 'ALL', JLEN, KNZ,
  863:      $                            Z( JROW, INCOL+1+J2 ), LDZ,
  864:      $                            WV( 1, 1+KZS ), LDWV )
  865: *
  866: *                    ==== Multiply by U12 ====
  867: *
  868:                      CALL ZLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
  869:      $                            LDWV )
  870:                      CALL ZTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
  871:      $                           U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
  872:      $                           LDWV )
  873: *
  874: *                    ==== Multiply by U11 ====
  875: *
  876:                      CALL ZGEMM( 'N', 'N', JLEN, I2, J2, ONE,
  877:      $                           Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
  878:      $                           WV, LDWV )
  879: *
  880: *                    ==== Copy left of Z to right of scratch ====
  881: *
  882:                      CALL ZLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
  883:      $                            LDZ, WV( 1, 1+I2 ), LDWV )
  884: *
  885: *                    ==== Multiply by U21 ====
  886: *
  887:                      CALL ZTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
  888:      $                           U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
  889:      $                           LDWV )
  890: *
  891: *                    ==== Multiply by U22 ====
  892: *
  893:                      CALL ZGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
  894:      $                           Z( JROW, INCOL+1+J2 ), LDZ,
  895:      $                           U( J2+1, I2+1 ), LDU, ONE,
  896:      $                           WV( 1, 1+I2 ), LDWV )
  897: *
  898: *                    ==== Copy the result back to Z ====
  899: *
  900:                      CALL ZLACPY( 'ALL', JLEN, KDU, WV, LDWV,
  901:      $                            Z( JROW, INCOL+1 ), LDZ )
  902:   200             CONTINUE
  903:                END IF
  904:             END IF
  905:          END IF
  906:   210 CONTINUE
  907: *
  908: *     ==== End of ZLAQR5 ====
  909: *
  910:       END

CVSweb interface <joel.bertrand@systella.fr>