rpl/lapack/lapack/zlaqr3.f - view

Return to zlaqr3.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / zlaqr3.f
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Jan 26 15:22:45 2010 UTC (14 years, 4 months ago) by bertrand
Branches: JKB
CVS tags: start, rpl-4_0_14, rpl-4_0_13, rpl-4_0_12, rpl-4_0_11, rpl-4_0_10



Commit initial.

    1:       SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
    2:      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
    3:      $                   NV, WV, LDWV, WORK, LWORK )
    4: *
    5: *  -- LAPACK auxiliary routine (version 3.2.1)                        --
    6: *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
    7: *  -- April 2009                                                      --
    8: *
    9: *     .. Scalar Arguments ..
   10:       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
   11:      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
   12:       LOGICAL            WANTT, WANTZ
   13: *     ..
   14: *     .. Array Arguments ..
   15:       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
   16:      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
   17: *     ..
   18: *
   19: *     ******************************************************************
   20: *     Aggressive early deflation:
   21: *
   22: *     This subroutine accepts as input an upper Hessenberg matrix
   23: *     H and performs an unitary similarity transformation
   24: *     designed to detect and deflate fully converged eigenvalues from
   25: *     a trailing principal submatrix.  On output H has been over-
   26: *     written by a new Hessenberg matrix that is a perturbation of
   27: *     an unitary similarity transformation of H.  It is to be
   28: *     hoped that the final version of H has many zero subdiagonal
   29: *     entries.
   30: *
   31: *     ******************************************************************
   32: *     WANTT   (input) LOGICAL
   33: *          If .TRUE., then the Hessenberg matrix H is fully updated
   34: *          so that the triangular Schur factor may be
   35: *          computed (in cooperation with the calling subroutine).
   36: *          If .FALSE., then only enough of H is updated to preserve
   37: *          the eigenvalues.
   38: *
   39: *     WANTZ   (input) LOGICAL
   40: *          If .TRUE., then the unitary matrix Z is updated so
   41: *          so that the unitary Schur factor may be computed
   42: *          (in cooperation with the calling subroutine).
   43: *          If .FALSE., then Z is not referenced.
   44: *
   45: *     N       (input) INTEGER
   46: *          The order of the matrix H and (if WANTZ is .TRUE.) the
   47: *          order of the unitary matrix Z.
   48: *
   49: *     KTOP    (input) INTEGER
   50: *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
   51: *          KBOT and KTOP together determine an isolated block
   52: *          along the diagonal of the Hessenberg matrix.
   53: *
   54: *     KBOT    (input) INTEGER
   55: *          It is assumed without a check that either
   56: *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
   57: *          determine an isolated block along the diagonal of the
   58: *          Hessenberg matrix.
   59: *
   60: *     NW      (input) INTEGER
   61: *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
   62: *
   63: *     H       (input/output) COMPLEX*16 array, dimension (LDH,N)
   64: *          On input the initial N-by-N section of H stores the
   65: *          Hessenberg matrix undergoing aggressive early deflation.
   66: *          On output H has been transformed by a unitary
   67: *          similarity transformation, perturbed, and the returned
   68: *          to Hessenberg form that (it is to be hoped) has some
   69: *          zero subdiagonal entries.
   70: *
   71: *     LDH     (input) integer
   72: *          Leading dimension of H just as declared in the calling
   73: *          subroutine.  N .LE. LDH
   74: *
   75: *     ILOZ    (input) INTEGER
   76: *     IHIZ    (input) INTEGER
   77: *          Specify the rows of Z to which transformations must be
   78: *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
   79: *
   80: *     Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
   81: *          IF WANTZ is .TRUE., then on output, the unitary
   82: *          similarity transformation mentioned above has been
   83: *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
   84: *          If WANTZ is .FALSE., then Z is unreferenced.
   85: *
   86: *     LDZ     (input) integer
   87: *          The leading dimension of Z just as declared in the
   88: *          calling subroutine.  1 .LE. LDZ.
   89: *
   90: *     NS      (output) integer
   91: *          The number of unconverged (ie approximate) eigenvalues
   92: *          returned in SR and SI that may be used as shifts by the
   93: *          calling subroutine.
   94: *
   95: *     ND      (output) integer
   96: *          The number of converged eigenvalues uncovered by this
   97: *          subroutine.
   98: *
   99: *     SH      (output) COMPLEX*16 array, dimension KBOT
  100: *          On output, approximate eigenvalues that may
  101: *          be used for shifts are stored in SH(KBOT-ND-NS+1)
  102: *          through SR(KBOT-ND).  Converged eigenvalues are
  103: *          stored in SH(KBOT-ND+1) through SH(KBOT).
  104: *
  105: *     V       (workspace) COMPLEX*16 array, dimension (LDV,NW)
  106: *          An NW-by-NW work array.
  107: *
  108: *     LDV     (input) integer scalar
  109: *          The leading dimension of V just as declared in the
  110: *          calling subroutine.  NW .LE. LDV
  111: *
  112: *     NH      (input) integer scalar
  113: *          The number of columns of T.  NH.GE.NW.
  114: *
  115: *     T       (workspace) COMPLEX*16 array, dimension (LDT,NW)
  116: *
  117: *     LDT     (input) integer
  118: *          The leading dimension of T just as declared in the
  119: *          calling subroutine.  NW .LE. LDT
  120: *
  121: *     NV      (input) integer
  122: *          The number of rows of work array WV available for
  123: *          workspace.  NV.GE.NW.
  124: *
  125: *     WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)
  126: *
  127: *     LDWV    (input) integer
  128: *          The leading dimension of W just as declared in the
  129: *          calling subroutine.  NW .LE. LDV
  130: *
  131: *     WORK    (workspace) COMPLEX*16 array, dimension LWORK.
  132: *          On exit, WORK(1) is set to an estimate of the optimal value
  133: *          of LWORK for the given values of N, NW, KTOP and KBOT.
  134: *
  135: *     LWORK   (input) integer
  136: *          The dimension of the work array WORK.  LWORK = 2*NW
  137: *          suffices, but greater efficiency may result from larger
  138: *          values of LWORK.
  139: *
  140: *          If LWORK = -1, then a workspace query is assumed; ZLAQR3
  141: *          only estimates the optimal workspace size for the given
  142: *          values of N, NW, KTOP and KBOT.  The estimate is returned
  143: *          in WORK(1).  No error message related to LWORK is issued
  144: *          by XERBLA.  Neither H nor Z are accessed.
  145: *
  146: *     ================================================================
  147: *     Based on contributions by
  148: *        Karen Braman and Ralph Byers, Department of Mathematics,
  149: *        University of Kansas, USA
  150: *
  151: *     ================================================================
  152: *     .. Parameters ..
  153:       COMPLEX*16         ZERO, ONE
  154:       PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
  155:      $                   ONE = ( 1.0d0, 0.0d0 ) )
  156:       DOUBLE PRECISION   RZERO, RONE
  157:       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
  158: *     ..
  159: *     .. Local Scalars ..
  160:       COMPLEX*16         BETA, CDUM, S, TAU
  161:       DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP
  162:       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
  163:      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
  164:      $                   LWKOPT, NMIN
  165: *     ..
  166: *     .. External Functions ..
  167:       DOUBLE PRECISION   DLAMCH
  168:       INTEGER            ILAENV
  169:       EXTERNAL           DLAMCH, ILAENV
  170: *     ..
  171: *     .. External Subroutines ..
  172:       EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
  173:      $                   ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
  174: *     ..
  175: *     .. Intrinsic Functions ..
  176:       INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN
  177: *     ..
  178: *     .. Statement Functions ..
  179:       DOUBLE PRECISION   CABS1
  180: *     ..
  181: *     .. Statement Function definitions ..
  182:       CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
  183: *     ..
  184: *     .. Executable Statements ..
  185: *
  186: *     ==== Estimate optimal workspace. ====
  187: *
  188:       JW = MIN( NW, KBOT-KTOP+1 )
  189:       IF( JW.LE.2 ) THEN
  190:          LWKOPT = 1
  191:       ELSE
  192: *
  193: *        ==== Workspace query call to ZGEHRD ====
  194: *
  195:          CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
  196:          LWK1 = INT( WORK( 1 ) )
  197: *
  198: *        ==== Workspace query call to ZUNMHR ====
  199: *
  200:          CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
  201:      $                WORK, -1, INFO )
  202:          LWK2 = INT( WORK( 1 ) )
  203: *
  204: *        ==== Workspace query call to ZLAQR4 ====
  205: *
  206:          CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V,
  207:      $                LDV, WORK, -1, INFQR )
  208:          LWK3 = INT( WORK( 1 ) )
  209: *
  210: *        ==== Optimal workspace ====
  211: *
  212:          LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
  213:       END IF
  214: *
  215: *     ==== Quick return in case of workspace query. ====
  216: *
  217:       IF( LWORK.EQ.-1 ) THEN
  218:          WORK( 1 ) = DCMPLX( LWKOPT, 0 )
  219:          RETURN
  220:       END IF
  221: *
  222: *     ==== Nothing to do ...
  223: *     ... for an empty active block ... ====
  224:       NS = 0
  225:       ND = 0
  226:       WORK( 1 ) = ONE
  227:       IF( KTOP.GT.KBOT )
  228:      $   RETURN
  229: *     ... nor for an empty deflation window. ====
  230:       IF( NW.LT.1 )
  231:      $   RETURN
  232: *
  233: *     ==== Machine constants ====
  234: *
  235:       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
  236:       SAFMAX = RONE / SAFMIN
  237:       CALL DLABAD( SAFMIN, SAFMAX )
  238:       ULP = DLAMCH( 'PRECISION' )
  239:       SMLNUM = SAFMIN*( DBLE( N ) / ULP )
  240: *
  241: *     ==== Setup deflation window ====
  242: *
  243:       JW = MIN( NW, KBOT-KTOP+1 )
  244:       KWTOP = KBOT - JW + 1
  245:       IF( KWTOP.EQ.KTOP ) THEN
  246:          S = ZERO
  247:       ELSE
  248:          S = H( KWTOP, KWTOP-1 )
  249:       END IF
  250: *
  251:       IF( KBOT.EQ.KWTOP ) THEN
  252: *
  253: *        ==== 1-by-1 deflation window: not much to do ====
  254: *
  255:          SH( KWTOP ) = H( KWTOP, KWTOP )
  256:          NS = 1
  257:          ND = 0
  258:          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
  259:      $       KWTOP ) ) ) ) THEN
  260:             NS = 0
  261:             ND = 1
  262:             IF( KWTOP.GT.KTOP )
  263:      $         H( KWTOP, KWTOP-1 ) = ZERO
  264:          END IF
  265:          WORK( 1 ) = ONE
  266:          RETURN
  267:       END IF
  268: *
  269: *     ==== Convert to spike-triangular form.  (In case of a
  270: *     .    rare QR failure, this routine continues to do
  271: *     .    aggressive early deflation using that part of
  272: *     .    the deflation window that converged using INFQR
  273: *     .    here and there to keep track.) ====
  274: *
  275:       CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
  276:       CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
  277: *
  278:       CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
  279:       NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK )
  280:       IF( JW.GT.NMIN ) THEN
  281:          CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
  282:      $                JW, V, LDV, WORK, LWORK, INFQR )
  283:       ELSE
  284:          CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
  285:      $                JW, V, LDV, INFQR )
  286:       END IF
  287: *
  288: *     ==== Deflation detection loop ====
  289: *
  290:       NS = JW
  291:       ILST = INFQR + 1
  292:       DO 10 KNT = INFQR + 1, JW
  293: *
  294: *        ==== Small spike tip deflation test ====
  295: *
  296:          FOO = CABS1( T( NS, NS ) )
  297:          IF( FOO.EQ.RZERO )
  298:      $      FOO = CABS1( S )
  299:          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
  300:      $        THEN
  301: *
  302: *           ==== One more converged eigenvalue ====
  303: *
  304:             NS = NS - 1
  305:          ELSE
  306: *
  307: *           ==== One undeflatable eigenvalue.  Move it up out of the
  308: *           .    way.   (ZTREXC can not fail in this case.) ====
  309: *
  310:             IFST = NS
  311:             CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
  312:             ILST = ILST + 1
  313:          END IF
  314:    10 CONTINUE
  315: *
  316: *        ==== Return to Hessenberg form ====
  317: *
  318:       IF( NS.EQ.0 )
  319:      $   S = ZERO
  320: *
  321:       IF( NS.LT.JW ) THEN
  322: *
  323: *        ==== sorting the diagonal of T improves accuracy for
  324: *        .    graded matrices.  ====
  325: *
  326:          DO 30 I = INFQR + 1, NS
  327:             IFST = I
  328:             DO 20 J = I + 1, NS
  329:                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
  330:      $            IFST = J
  331:    20       CONTINUE
  332:             ILST = I
  333:             IF( IFST.NE.ILST )
  334:      $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
  335:    30    CONTINUE
  336:       END IF
  337: *
  338: *     ==== Restore shift/eigenvalue array from T ====
  339: *
  340:       DO 40 I = INFQR + 1, JW
  341:          SH( KWTOP+I-1 ) = T( I, I )
  342:    40 CONTINUE
  343: *
  344: *
  345:       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
  346:          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
  347: *
  348: *           ==== Reflect spike back into lower triangle ====
  349: *
  350:             CALL ZCOPY( NS, V, LDV, WORK, 1 )
  351:             DO 50 I = 1, NS
  352:                WORK( I ) = DCONJG( WORK( I ) )
  353:    50       CONTINUE
  354:             BETA = WORK( 1 )
  355:             CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
  356:             WORK( 1 ) = ONE
  357: *
  358:             CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
  359: *
  360:             CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,
  361:      $                  WORK( JW+1 ) )
  362:             CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
  363:      $                  WORK( JW+1 ) )
  364:             CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
  365:      $                  WORK( JW+1 ) )
  366: *
  367:             CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
  368:      $                   LWORK-JW, INFO )
  369:          END IF
  370: *
  371: *        ==== Copy updated reduced window into place ====
  372: *
  373:          IF( KWTOP.GT.1 )
  374:      $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )
  375:          CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
  376:          CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
  377:      $               LDH+1 )
  378: *
  379: *        ==== Accumulate orthogonal matrix in order update
  380: *        .    H and Z, if requested.  ====
  381: *
  382:          IF( NS.GT.1 .AND. S.NE.ZERO )
  383:      $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
  384:      $                   WORK( JW+1 ), LWORK-JW, INFO )
  385: *
  386: *        ==== Update vertical slab in H ====
  387: *
  388:          IF( WANTT ) THEN
  389:             LTOP = 1
  390:          ELSE
  391:             LTOP = KTOP
  392:          END IF
  393:          DO 60 KROW = LTOP, KWTOP - 1, NV
  394:             KLN = MIN( NV, KWTOP-KROW )
  395:             CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
  396:      $                  LDH, V, LDV, ZERO, WV, LDWV )
  397:             CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
  398:    60    CONTINUE
  399: *
  400: *        ==== Update horizontal slab in H ====
  401: *
  402:          IF( WANTT ) THEN
  403:             DO 70 KCOL = KBOT + 1, N, NH
  404:                KLN = MIN( NH, N-KCOL+1 )
  405:                CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
  406:      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
  407:                CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
  408:      $                      LDH )
  409:    70       CONTINUE
  410:          END IF
  411: *
  412: *        ==== Update vertical slab in Z ====
  413: *
  414:          IF( WANTZ ) THEN
  415:             DO 80 KROW = ILOZ, IHIZ, NV
  416:                KLN = MIN( NV, IHIZ-KROW+1 )
  417:                CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
  418:      $                     LDZ, V, LDV, ZERO, WV, LDWV )
  419:                CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
  420:      $                      LDZ )
  421:    80       CONTINUE
  422:          END IF
  423:       END IF
  424: *
  425: *     ==== Return the number of deflations ... ====
  426: *
  427:       ND = JW - NS
  428: *
  429: *     ==== ... and the number of shifts. (Subtracting
  430: *     .    INFQR from the spike length takes care
  431: *     .    of the case of a rare QR failure while
  432: *     .    calculating eigenvalues of the deflation
  433: *     .    window.)  ====
  434: *
  435:       NS = NS - INFQR
  436: *
  437: *      ==== Return optimal workspace. ====
  438: *
  439:       WORK( 1 ) = DCMPLX( LWKOPT, 0 )
  440: *
  441: *     ==== End of ZLAQR3 ====
  442: *
  443:       END

CVSweb interface <joel.bertrand@systella.fr>