File:  [local] / rpl / lapack / lapack / zlaqr2.f
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Jan 26 15:22:45 2010 UTC (14 years, 3 months ago) by bertrand
Branches: JKB
CVS tags: start, rpl-4_0_14, rpl-4_0_13, rpl-4_0_12, rpl-4_0_11, rpl-4_0_10


Commit initial.

    1:       SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
    2:      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
    3:      $                   NV, WV, LDWV, WORK, LWORK )
    4: *
    5: *  -- LAPACK auxiliary routine (version 3.2.1)                        --
    6: *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
    7: *  -- April 2009                                                      --
    8: *
    9: *     .. Scalar Arguments ..
   10:       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
   11:      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
   12:       LOGICAL            WANTT, WANTZ
   13: *     ..
   14: *     .. Array Arguments ..
   15:       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
   16:      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
   17: *     ..
   18: *
   19: *     This subroutine is identical to ZLAQR3 except that it avoids
   20: *     recursion by calling ZLAHQR instead of ZLAQR4.
   21: *
   22: *
   23: *     ******************************************************************
   24: *     Aggressive early deflation:
   25: *
   26: *     This subroutine accepts as input an upper Hessenberg matrix
   27: *     H and performs an unitary similarity transformation
   28: *     designed to detect and deflate fully converged eigenvalues from
   29: *     a trailing principal submatrix.  On output H has been over-
   30: *     written by a new Hessenberg matrix that is a perturbation of
   31: *     an unitary similarity transformation of H.  It is to be
   32: *     hoped that the final version of H has many zero subdiagonal
   33: *     entries.
   34: *
   35: *     ******************************************************************
   36: *     WANTT   (input) LOGICAL
   37: *          If .TRUE., then the Hessenberg matrix H is fully updated
   38: *          so that the triangular Schur factor may be
   39: *          computed (in cooperation with the calling subroutine).
   40: *          If .FALSE., then only enough of H is updated to preserve
   41: *          the eigenvalues.
   42: *
   43: *     WANTZ   (input) LOGICAL
   44: *          If .TRUE., then the unitary matrix Z is updated so
   45: *          so that the unitary Schur factor may be computed
   46: *          (in cooperation with the calling subroutine).
   47: *          If .FALSE., then Z is not referenced.
   48: *
   49: *     N       (input) INTEGER
   50: *          The order of the matrix H and (if WANTZ is .TRUE.) the
   51: *          order of the unitary matrix Z.
   52: *
   53: *     KTOP    (input) INTEGER
   54: *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
   55: *          KBOT and KTOP together determine an isolated block
   56: *          along the diagonal of the Hessenberg matrix.
   57: *
   58: *     KBOT    (input) INTEGER
   59: *          It is assumed without a check that either
   60: *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
   61: *          determine an isolated block along the diagonal of the
   62: *          Hessenberg matrix.
   63: *
   64: *     NW      (input) INTEGER
   65: *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
   66: *
   67: *     H       (input/output) COMPLEX*16 array, dimension (LDH,N)
   68: *          On input the initial N-by-N section of H stores the
   69: *          Hessenberg matrix undergoing aggressive early deflation.
   70: *          On output H has been transformed by a unitary
   71: *          similarity transformation, perturbed, and the returned
   72: *          to Hessenberg form that (it is to be hoped) has some
   73: *          zero subdiagonal entries.
   74: *
   75: *     LDH     (input) integer
   76: *          Leading dimension of H just as declared in the calling
   77: *          subroutine.  N .LE. LDH
   78: *
   79: *     ILOZ    (input) INTEGER
   80: *     IHIZ    (input) INTEGER
   81: *          Specify the rows of Z to which transformations must be
   82: *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
   83: *
   84: *     Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
   85: *          IF WANTZ is .TRUE., then on output, the unitary
   86: *          similarity transformation mentioned above has been
   87: *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
   88: *          If WANTZ is .FALSE., then Z is unreferenced.
   89: *
   90: *     LDZ     (input) integer
   91: *          The leading dimension of Z just as declared in the
   92: *          calling subroutine.  1 .LE. LDZ.
   93: *
   94: *     NS      (output) integer
   95: *          The number of unconverged (ie approximate) eigenvalues
   96: *          returned in SR and SI that may be used as shifts by the
   97: *          calling subroutine.
   98: *
   99: *     ND      (output) integer
  100: *          The number of converged eigenvalues uncovered by this
  101: *          subroutine.
  102: *
  103: *     SH      (output) COMPLEX*16 array, dimension KBOT
  104: *          On output, approximate eigenvalues that may
  105: *          be used for shifts are stored in SH(KBOT-ND-NS+1)
  106: *          through SR(KBOT-ND).  Converged eigenvalues are
  107: *          stored in SH(KBOT-ND+1) through SH(KBOT).
  108: *
  109: *     V       (workspace) COMPLEX*16 array, dimension (LDV,NW)
  110: *          An NW-by-NW work array.
  111: *
  112: *     LDV     (input) integer scalar
  113: *          The leading dimension of V just as declared in the
  114: *          calling subroutine.  NW .LE. LDV
  115: *
  116: *     NH      (input) integer scalar
  117: *          The number of columns of T.  NH.GE.NW.
  118: *
  119: *     T       (workspace) COMPLEX*16 array, dimension (LDT,NW)
  120: *
  121: *     LDT     (input) integer
  122: *          The leading dimension of T just as declared in the
  123: *          calling subroutine.  NW .LE. LDT
  124: *
  125: *     NV      (input) integer
  126: *          The number of rows of work array WV available for
  127: *          workspace.  NV.GE.NW.
  128: *
  129: *     WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)
  130: *
  131: *     LDWV    (input) integer
  132: *          The leading dimension of W just as declared in the
  133: *          calling subroutine.  NW .LE. LDV
  134: *
  135: *     WORK    (workspace) COMPLEX*16 array, dimension LWORK.
  136: *          On exit, WORK(1) is set to an estimate of the optimal value
  137: *          of LWORK for the given values of N, NW, KTOP and KBOT.
  138: *
  139: *     LWORK   (input) integer
  140: *          The dimension of the work array WORK.  LWORK = 2*NW
  141: *          suffices, but greater efficiency may result from larger
  142: *          values of LWORK.
  143: *
  144: *          If LWORK = -1, then a workspace query is assumed; ZLAQR2
  145: *          only estimates the optimal workspace size for the given
  146: *          values of N, NW, KTOP and KBOT.  The estimate is returned
  147: *          in WORK(1).  No error message related to LWORK is issued
  148: *          by XERBLA.  Neither H nor Z are accessed.
  149: *
  150: *     ================================================================
  151: *     Based on contributions by
  152: *        Karen Braman and Ralph Byers, Department of Mathematics,
  153: *        University of Kansas, USA
  154: *
  155: *     ================================================================
  156: *     .. Parameters ..
  157:       COMPLEX*16         ZERO, ONE
  158:       PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
  159:      $                   ONE = ( 1.0d0, 0.0d0 ) )
  160:       DOUBLE PRECISION   RZERO, RONE
  161:       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
  162: *     ..
  163: *     .. Local Scalars ..
  164:       COMPLEX*16         BETA, CDUM, S, TAU
  165:       DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP
  166:       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
  167:      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT
  168: *     ..
  169: *     .. External Functions ..
  170:       DOUBLE PRECISION   DLAMCH
  171:       EXTERNAL           DLAMCH
  172: *     ..
  173: *     .. External Subroutines ..
  174:       EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
  175:      $                   ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
  176: *     ..
  177: *     .. Intrinsic Functions ..
  178:       INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN
  179: *     ..
  180: *     .. Statement Functions ..
  181:       DOUBLE PRECISION   CABS1
  182: *     ..
  183: *     .. Statement Function definitions ..
  184:       CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
  185: *     ..
  186: *     .. Executable Statements ..
  187: *
  188: *     ==== Estimate optimal workspace. ====
  189: *
  190:       JW = MIN( NW, KBOT-KTOP+1 )
  191:       IF( JW.LE.2 ) THEN
  192:          LWKOPT = 1
  193:       ELSE
  194: *
  195: *        ==== Workspace query call to ZGEHRD ====
  196: *
  197:          CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
  198:          LWK1 = INT( WORK( 1 ) )
  199: *
  200: *        ==== Workspace query call to ZUNMHR ====
  201: *
  202:          CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
  203:      $                WORK, -1, INFO )
  204:          LWK2 = INT( WORK( 1 ) )
  205: *
  206: *        ==== Optimal workspace ====
  207: *
  208:          LWKOPT = JW + MAX( LWK1, LWK2 )
  209:       END IF
  210: *
  211: *     ==== Quick return in case of workspace query. ====
  212: *
  213:       IF( LWORK.EQ.-1 ) THEN
  214:          WORK( 1 ) = DCMPLX( LWKOPT, 0 )
  215:          RETURN
  216:       END IF
  217: *
  218: *     ==== Nothing to do ...
  219: *     ... for an empty active block ... ====
  220:       NS = 0
  221:       ND = 0
  222:       WORK( 1 ) = ONE
  223:       IF( KTOP.GT.KBOT )
  224:      $   RETURN
  225: *     ... nor for an empty deflation window. ====
  226:       IF( NW.LT.1 )
  227:      $   RETURN
  228: *
  229: *     ==== Machine constants ====
  230: *
  231:       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
  232:       SAFMAX = RONE / SAFMIN
  233:       CALL DLABAD( SAFMIN, SAFMAX )
  234:       ULP = DLAMCH( 'PRECISION' )
  235:       SMLNUM = SAFMIN*( DBLE( N ) / ULP )
  236: *
  237: *     ==== Setup deflation window ====
  238: *
  239:       JW = MIN( NW, KBOT-KTOP+1 )
  240:       KWTOP = KBOT - JW + 1
  241:       IF( KWTOP.EQ.KTOP ) THEN
  242:          S = ZERO
  243:       ELSE
  244:          S = H( KWTOP, KWTOP-1 )
  245:       END IF
  246: *
  247:       IF( KBOT.EQ.KWTOP ) THEN
  248: *
  249: *        ==== 1-by-1 deflation window: not much to do ====
  250: *
  251:          SH( KWTOP ) = H( KWTOP, KWTOP )
  252:          NS = 1
  253:          ND = 0
  254:          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
  255:      $       KWTOP ) ) ) ) THEN
  256:             NS = 0
  257:             ND = 1
  258:             IF( KWTOP.GT.KTOP )
  259:      $         H( KWTOP, KWTOP-1 ) = ZERO
  260:          END IF
  261:          WORK( 1 ) = ONE
  262:          RETURN
  263:       END IF
  264: *
  265: *     ==== Convert to spike-triangular form.  (In case of a
  266: *     .    rare QR failure, this routine continues to do
  267: *     .    aggressive early deflation using that part of
  268: *     .    the deflation window that converged using INFQR
  269: *     .    here and there to keep track.) ====
  270: *
  271:       CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
  272:       CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
  273: *
  274:       CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
  275:       CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
  276:      $             JW, V, LDV, INFQR )
  277: *
  278: *     ==== Deflation detection loop ====
  279: *
  280:       NS = JW
  281:       ILST = INFQR + 1
  282:       DO 10 KNT = INFQR + 1, JW
  283: *
  284: *        ==== Small spike tip deflation test ====
  285: *
  286:          FOO = CABS1( T( NS, NS ) )
  287:          IF( FOO.EQ.RZERO )
  288:      $      FOO = CABS1( S )
  289:          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
  290:      $        THEN
  291: *
  292: *           ==== One more converged eigenvalue ====
  293: *
  294:             NS = NS - 1
  295:          ELSE
  296: *
  297: *           ==== One undeflatable eigenvalue.  Move it up out of the
  298: *           .    way.   (ZTREXC can not fail in this case.) ====
  299: *
  300:             IFST = NS
  301:             CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
  302:             ILST = ILST + 1
  303:          END IF
  304:    10 CONTINUE
  305: *
  306: *        ==== Return to Hessenberg form ====
  307: *
  308:       IF( NS.EQ.0 )
  309:      $   S = ZERO
  310: *
  311:       IF( NS.LT.JW ) THEN
  312: *
  313: *        ==== sorting the diagonal of T improves accuracy for
  314: *        .    graded matrices.  ====
  315: *
  316:          DO 30 I = INFQR + 1, NS
  317:             IFST = I
  318:             DO 20 J = I + 1, NS
  319:                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
  320:      $            IFST = J
  321:    20       CONTINUE
  322:             ILST = I
  323:             IF( IFST.NE.ILST )
  324:      $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
  325:    30    CONTINUE
  326:       END IF
  327: *
  328: *     ==== Restore shift/eigenvalue array from T ====
  329: *
  330:       DO 40 I = INFQR + 1, JW
  331:          SH( KWTOP+I-1 ) = T( I, I )
  332:    40 CONTINUE
  333: *
  334: *
  335:       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
  336:          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
  337: *
  338: *           ==== Reflect spike back into lower triangle ====
  339: *
  340:             CALL ZCOPY( NS, V, LDV, WORK, 1 )
  341:             DO 50 I = 1, NS
  342:                WORK( I ) = DCONJG( WORK( I ) )
  343:    50       CONTINUE
  344:             BETA = WORK( 1 )
  345:             CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
  346:             WORK( 1 ) = ONE
  347: *
  348:             CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
  349: *
  350:             CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,
  351:      $                  WORK( JW+1 ) )
  352:             CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
  353:      $                  WORK( JW+1 ) )
  354:             CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
  355:      $                  WORK( JW+1 ) )
  356: *
  357:             CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
  358:      $                   LWORK-JW, INFO )
  359:          END IF
  360: *
  361: *        ==== Copy updated reduced window into place ====
  362: *
  363:          IF( KWTOP.GT.1 )
  364:      $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )
  365:          CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
  366:          CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
  367:      $               LDH+1 )
  368: *
  369: *        ==== Accumulate orthogonal matrix in order update
  370: *        .    H and Z, if requested.  ====
  371: *
  372:          IF( NS.GT.1 .AND. S.NE.ZERO )
  373:      $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
  374:      $                   WORK( JW+1 ), LWORK-JW, INFO )
  375: *
  376: *        ==== Update vertical slab in H ====
  377: *
  378:          IF( WANTT ) THEN
  379:             LTOP = 1
  380:          ELSE
  381:             LTOP = KTOP
  382:          END IF
  383:          DO 60 KROW = LTOP, KWTOP - 1, NV
  384:             KLN = MIN( NV, KWTOP-KROW )
  385:             CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
  386:      $                  LDH, V, LDV, ZERO, WV, LDWV )
  387:             CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
  388:    60    CONTINUE
  389: *
  390: *        ==== Update horizontal slab in H ====
  391: *
  392:          IF( WANTT ) THEN
  393:             DO 70 KCOL = KBOT + 1, N, NH
  394:                KLN = MIN( NH, N-KCOL+1 )
  395:                CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
  396:      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
  397:                CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
  398:      $                      LDH )
  399:    70       CONTINUE
  400:          END IF
  401: *
  402: *        ==== Update vertical slab in Z ====
  403: *
  404:          IF( WANTZ ) THEN
  405:             DO 80 KROW = ILOZ, IHIZ, NV
  406:                KLN = MIN( NV, IHIZ-KROW+1 )
  407:                CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
  408:      $                     LDZ, V, LDV, ZERO, WV, LDWV )
  409:                CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
  410:      $                      LDZ )
  411:    80       CONTINUE
  412:          END IF
  413:       END IF
  414: *
  415: *     ==== Return the number of deflations ... ====
  416: *
  417:       ND = JW - NS
  418: *
  419: *     ==== ... and the number of shifts. (Subtracting
  420: *     .    INFQR from the spike length takes care
  421: *     .    of the case of a rare QR failure while
  422: *     .    calculating eigenvalues of the deflation
  423: *     .    window.)  ====
  424: *
  425:       NS = NS - INFQR
  426: *
  427: *      ==== Return optimal workspace. ====
  428: *
  429:       WORK( 1 ) = DCMPLX( LWKOPT, 0 )
  430: *
  431: *     ==== End of ZLAQR2 ====
  432: *
  433:       END

CVSweb interface <joel.bertrand@systella.fr>