Annotation of rpl/lapack/lapack/zlaqr3.f, revision 1.3

1.1       bertrand    1:       SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
                      2:      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
                      3:      $                   NV, WV, LDWV, WORK, LWORK )
                      4: *
                      5: *  -- LAPACK auxiliary routine (version 3.2.1)                        --
                      6: *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
                      7: *  -- April 2009                                                      --
                      8: *
                      9: *     .. Scalar Arguments ..
                     10:       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
                     11:      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
                     12:       LOGICAL            WANTT, WANTZ
                     13: *     ..
                     14: *     .. Array Arguments ..
                     15:       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
                     16:      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
                     17: *     ..
                     18: *
                     19: *     ******************************************************************
                     20: *     Aggressive early deflation:
                     21: *
                     22: *     This subroutine accepts as input an upper Hessenberg matrix
                     23: *     H and performs an unitary similarity transformation
                     24: *     designed to detect and deflate fully converged eigenvalues from
                     25: *     a trailing principal submatrix.  On output H has been over-
                     26: *     written by a new Hessenberg matrix that is a perturbation of
                     27: *     an unitary similarity transformation of H.  It is to be
                     28: *     hoped that the final version of H has many zero subdiagonal
                     29: *     entries.
                     30: *
                     31: *     ******************************************************************
                     32: *     WANTT   (input) LOGICAL
                     33: *          If .TRUE., then the Hessenberg matrix H is fully updated
                     34: *          so that the triangular Schur factor may be
                     35: *          computed (in cooperation with the calling subroutine).
                     36: *          If .FALSE., then only enough of H is updated to preserve
                     37: *          the eigenvalues.
                     38: *
                     39: *     WANTZ   (input) LOGICAL
                     40: *          If .TRUE., then the unitary matrix Z is updated so
                     41: *          so that the unitary Schur factor may be computed
                     42: *          (in cooperation with the calling subroutine).
                     43: *          If .FALSE., then Z is not referenced.
                     44: *
                     45: *     N       (input) INTEGER
                     46: *          The order of the matrix H and (if WANTZ is .TRUE.) the
                     47: *          order of the unitary matrix Z.
                     48: *
                     49: *     KTOP    (input) INTEGER
                     50: *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
                     51: *          KBOT and KTOP together determine an isolated block
                     52: *          along the diagonal of the Hessenberg matrix.
                     53: *
                     54: *     KBOT    (input) INTEGER
                     55: *          It is assumed without a check that either
                     56: *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
                     57: *          determine an isolated block along the diagonal of the
                     58: *          Hessenberg matrix.
                     59: *
                     60: *     NW      (input) INTEGER
                     61: *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
                     62: *
                     63: *     H       (input/output) COMPLEX*16 array, dimension (LDH,N)
                     64: *          On input the initial N-by-N section of H stores the
                     65: *          Hessenberg matrix undergoing aggressive early deflation.
                     66: *          On output H has been transformed by a unitary
                     67: *          similarity transformation, perturbed, and the returned
                     68: *          to Hessenberg form that (it is to be hoped) has some
                     69: *          zero subdiagonal entries.
                     70: *
                     71: *     LDH     (input) integer
                     72: *          Leading dimension of H just as declared in the calling
                     73: *          subroutine.  N .LE. LDH
                     74: *
                     75: *     ILOZ    (input) INTEGER
                     76: *     IHIZ    (input) INTEGER
                     77: *          Specify the rows of Z to which transformations must be
                     78: *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
                     79: *
                     80: *     Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
                     81: *          IF WANTZ is .TRUE., then on output, the unitary
                     82: *          similarity transformation mentioned above has been
                     83: *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
                     84: *          If WANTZ is .FALSE., then Z is unreferenced.
                     85: *
                     86: *     LDZ     (input) integer
                     87: *          The leading dimension of Z just as declared in the
                     88: *          calling subroutine.  1 .LE. LDZ.
                     89: *
                     90: *     NS      (output) integer
                     91: *          The number of unconverged (ie approximate) eigenvalues
                     92: *          returned in SR and SI that may be used as shifts by the
                     93: *          calling subroutine.
                     94: *
                     95: *     ND      (output) integer
                     96: *          The number of converged eigenvalues uncovered by this
                     97: *          subroutine.
                     98: *
                     99: *     SH      (output) COMPLEX*16 array, dimension KBOT
                    100: *          On output, approximate eigenvalues that may
                    101: *          be used for shifts are stored in SH(KBOT-ND-NS+1)
                    102: *          through SR(KBOT-ND).  Converged eigenvalues are
                    103: *          stored in SH(KBOT-ND+1) through SH(KBOT).
                    104: *
                    105: *     V       (workspace) COMPLEX*16 array, dimension (LDV,NW)
                    106: *          An NW-by-NW work array.
                    107: *
                    108: *     LDV     (input) integer scalar
                    109: *          The leading dimension of V just as declared in the
                    110: *          calling subroutine.  NW .LE. LDV
                    111: *
                    112: *     NH      (input) integer scalar
                    113: *          The number of columns of T.  NH.GE.NW.
                    114: *
                    115: *     T       (workspace) COMPLEX*16 array, dimension (LDT,NW)
                    116: *
                    117: *     LDT     (input) integer
                    118: *          The leading dimension of T just as declared in the
                    119: *          calling subroutine.  NW .LE. LDT
                    120: *
                    121: *     NV      (input) integer
                    122: *          The number of rows of work array WV available for
                    123: *          workspace.  NV.GE.NW.
                    124: *
                    125: *     WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)
                    126: *
                    127: *     LDWV    (input) integer
                    128: *          The leading dimension of W just as declared in the
                    129: *          calling subroutine.  NW .LE. LDV
                    130: *
                    131: *     WORK    (workspace) COMPLEX*16 array, dimension LWORK.
                    132: *          On exit, WORK(1) is set to an estimate of the optimal value
                    133: *          of LWORK for the given values of N, NW, KTOP and KBOT.
                    134: *
                    135: *     LWORK   (input) integer
                    136: *          The dimension of the work array WORK.  LWORK = 2*NW
                    137: *          suffices, but greater efficiency may result from larger
                    138: *          values of LWORK.
                    139: *
                    140: *          If LWORK = -1, then a workspace query is assumed; ZLAQR3
                    141: *          only estimates the optimal workspace size for the given
                    142: *          values of N, NW, KTOP and KBOT.  The estimate is returned
                    143: *          in WORK(1).  No error message related to LWORK is issued
                    144: *          by XERBLA.  Neither H nor Z are accessed.
                    145: *
                    146: *     ================================================================
                    147: *     Based on contributions by
                    148: *        Karen Braman and Ralph Byers, Department of Mathematics,
                    149: *        University of Kansas, USA
                    150: *
                    151: *     ================================================================
                    152: *     .. Parameters ..
                    153:       COMPLEX*16         ZERO, ONE
                    154:       PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
                    155:      $                   ONE = ( 1.0d0, 0.0d0 ) )
                    156:       DOUBLE PRECISION   RZERO, RONE
                    157:       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
                    158: *     ..
                    159: *     .. Local Scalars ..
                    160:       COMPLEX*16         BETA, CDUM, S, TAU
                    161:       DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP
                    162:       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
                    163:      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
                    164:      $                   LWKOPT, NMIN
                    165: *     ..
                    166: *     .. External Functions ..
                    167:       DOUBLE PRECISION   DLAMCH
                    168:       INTEGER            ILAENV
                    169:       EXTERNAL           DLAMCH, ILAENV
                    170: *     ..
                    171: *     .. External Subroutines ..
                    172:       EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
                    173:      $                   ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
                    174: *     ..
                    175: *     .. Intrinsic Functions ..
                    176:       INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN
                    177: *     ..
                    178: *     .. Statement Functions ..
                    179:       DOUBLE PRECISION   CABS1
                    180: *     ..
                    181: *     .. Statement Function definitions ..
                    182:       CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
                    183: *     ..
                    184: *     .. Executable Statements ..
                    185: *
                    186: *     ==== Estimate optimal workspace. ====
                    187: *
                    188:       JW = MIN( NW, KBOT-KTOP+1 )
                    189:       IF( JW.LE.2 ) THEN
                    190:          LWKOPT = 1
                    191:       ELSE
                    192: *
                    193: *        ==== Workspace query call to ZGEHRD ====
                    194: *
                    195:          CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
                    196:          LWK1 = INT( WORK( 1 ) )
                    197: *
                    198: *        ==== Workspace query call to ZUNMHR ====
                    199: *
                    200:          CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
                    201:      $                WORK, -1, INFO )
                    202:          LWK2 = INT( WORK( 1 ) )
                    203: *
                    204: *        ==== Workspace query call to ZLAQR4 ====
                    205: *
                    206:          CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V,
                    207:      $                LDV, WORK, -1, INFQR )
                    208:          LWK3 = INT( WORK( 1 ) )
                    209: *
                    210: *        ==== Optimal workspace ====
                    211: *
                    212:          LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
                    213:       END IF
                    214: *
                    215: *     ==== Quick return in case of workspace query. ====
                    216: *
                    217:       IF( LWORK.EQ.-1 ) THEN
                    218:          WORK( 1 ) = DCMPLX( LWKOPT, 0 )
                    219:          RETURN
                    220:       END IF
                    221: *
                    222: *     ==== Nothing to do ...
                    223: *     ... for an empty active block ... ====
                    224:       NS = 0
                    225:       ND = 0
                    226:       WORK( 1 ) = ONE
                    227:       IF( KTOP.GT.KBOT )
                    228:      $   RETURN
                    229: *     ... nor for an empty deflation window. ====
                    230:       IF( NW.LT.1 )
                    231:      $   RETURN
                    232: *
                    233: *     ==== Machine constants ====
                    234: *
                    235:       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
                    236:       SAFMAX = RONE / SAFMIN
                    237:       CALL DLABAD( SAFMIN, SAFMAX )
                    238:       ULP = DLAMCH( 'PRECISION' )
                    239:       SMLNUM = SAFMIN*( DBLE( N ) / ULP )
                    240: *
                    241: *     ==== Setup deflation window ====
                    242: *
                    243:       JW = MIN( NW, KBOT-KTOP+1 )
                    244:       KWTOP = KBOT - JW + 1
                    245:       IF( KWTOP.EQ.KTOP ) THEN
                    246:          S = ZERO
                    247:       ELSE
                    248:          S = H( KWTOP, KWTOP-1 )
                    249:       END IF
                    250: *
                    251:       IF( KBOT.EQ.KWTOP ) THEN
                    252: *
                    253: *        ==== 1-by-1 deflation window: not much to do ====
                    254: *
                    255:          SH( KWTOP ) = H( KWTOP, KWTOP )
                    256:          NS = 1
                    257:          ND = 0
                    258:          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
                    259:      $       KWTOP ) ) ) ) THEN
                    260:             NS = 0
                    261:             ND = 1
                    262:             IF( KWTOP.GT.KTOP )
                    263:      $         H( KWTOP, KWTOP-1 ) = ZERO
                    264:          END IF
                    265:          WORK( 1 ) = ONE
                    266:          RETURN
                    267:       END IF
                    268: *
                    269: *     ==== Convert to spike-triangular form.  (In case of a
                    270: *     .    rare QR failure, this routine continues to do
                    271: *     .    aggressive early deflation using that part of
                    272: *     .    the deflation window that converged using INFQR
                    273: *     .    here and there to keep track.) ====
                    274: *
                    275:       CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
                    276:       CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
                    277: *
                    278:       CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
                    279:       NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK )
                    280:       IF( JW.GT.NMIN ) THEN
                    281:          CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
                    282:      $                JW, V, LDV, WORK, LWORK, INFQR )
                    283:       ELSE
                    284:          CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
                    285:      $                JW, V, LDV, INFQR )
                    286:       END IF
                    287: *
                    288: *     ==== Deflation detection loop ====
                    289: *
                    290:       NS = JW
                    291:       ILST = INFQR + 1
                    292:       DO 10 KNT = INFQR + 1, JW
                    293: *
                    294: *        ==== Small spike tip deflation test ====
                    295: *
                    296:          FOO = CABS1( T( NS, NS ) )
                    297:          IF( FOO.EQ.RZERO )
                    298:      $      FOO = CABS1( S )
                    299:          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
                    300:      $        THEN
                    301: *
                    302: *           ==== One more converged eigenvalue ====
                    303: *
                    304:             NS = NS - 1
                    305:          ELSE
                    306: *
                    307: *           ==== One undeflatable eigenvalue.  Move it up out of the
                    308: *           .    way.   (ZTREXC can not fail in this case.) ====
                    309: *
                    310:             IFST = NS
                    311:             CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
                    312:             ILST = ILST + 1
                    313:          END IF
                    314:    10 CONTINUE
                    315: *
                    316: *        ==== Return to Hessenberg form ====
                    317: *
                    318:       IF( NS.EQ.0 )
                    319:      $   S = ZERO
                    320: *
                    321:       IF( NS.LT.JW ) THEN
                    322: *
                    323: *        ==== sorting the diagonal of T improves accuracy for
                    324: *        .    graded matrices.  ====
                    325: *
                    326:          DO 30 I = INFQR + 1, NS
                    327:             IFST = I
                    328:             DO 20 J = I + 1, NS
                    329:                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
                    330:      $            IFST = J
                    331:    20       CONTINUE
                    332:             ILST = I
                    333:             IF( IFST.NE.ILST )
                    334:      $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
                    335:    30    CONTINUE
                    336:       END IF
                    337: *
                    338: *     ==== Restore shift/eigenvalue array from T ====
                    339: *
                    340:       DO 40 I = INFQR + 1, JW
                    341:          SH( KWTOP+I-1 ) = T( I, I )
                    342:    40 CONTINUE
                    343: *
                    344: *
                    345:       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
                    346:          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
                    347: *
                    348: *           ==== Reflect spike back into lower triangle ====
                    349: *
                    350:             CALL ZCOPY( NS, V, LDV, WORK, 1 )
                    351:             DO 50 I = 1, NS
                    352:                WORK( I ) = DCONJG( WORK( I ) )
                    353:    50       CONTINUE
                    354:             BETA = WORK( 1 )
                    355:             CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
                    356:             WORK( 1 ) = ONE
                    357: *
                    358:             CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
                    359: *
                    360:             CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,
                    361:      $                  WORK( JW+1 ) )
                    362:             CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
                    363:      $                  WORK( JW+1 ) )
                    364:             CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
                    365:      $                  WORK( JW+1 ) )
                    366: *
                    367:             CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
                    368:      $                   LWORK-JW, INFO )
                    369:          END IF
                    370: *
                    371: *        ==== Copy updated reduced window into place ====
                    372: *
                    373:          IF( KWTOP.GT.1 )
                    374:      $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )
                    375:          CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
                    376:          CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
                    377:      $               LDH+1 )
                    378: *
                    379: *        ==== Accumulate orthogonal matrix in order update
                    380: *        .    H and Z, if requested.  ====
                    381: *
                    382:          IF( NS.GT.1 .AND. S.NE.ZERO )
                    383:      $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
                    384:      $                   WORK( JW+1 ), LWORK-JW, INFO )
                    385: *
                    386: *        ==== Update vertical slab in H ====
                    387: *
                    388:          IF( WANTT ) THEN
                    389:             LTOP = 1
                    390:          ELSE
                    391:             LTOP = KTOP
                    392:          END IF
                    393:          DO 60 KROW = LTOP, KWTOP - 1, NV
                    394:             KLN = MIN( NV, KWTOP-KROW )
                    395:             CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
                    396:      $                  LDH, V, LDV, ZERO, WV, LDWV )
                    397:             CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
                    398:    60    CONTINUE
                    399: *
                    400: *        ==== Update horizontal slab in H ====
                    401: *
                    402:          IF( WANTT ) THEN
                    403:             DO 70 KCOL = KBOT + 1, N, NH
                    404:                KLN = MIN( NH, N-KCOL+1 )
                    405:                CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
                    406:      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
                    407:                CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
                    408:      $                      LDH )
                    409:    70       CONTINUE
                    410:          END IF
                    411: *
                    412: *        ==== Update vertical slab in Z ====
                    413: *
                    414:          IF( WANTZ ) THEN
                    415:             DO 80 KROW = ILOZ, IHIZ, NV
                    416:                KLN = MIN( NV, IHIZ-KROW+1 )
                    417:                CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
                    418:      $                     LDZ, V, LDV, ZERO, WV, LDWV )
                    419:                CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
                    420:      $                      LDZ )
                    421:    80       CONTINUE
                    422:          END IF
                    423:       END IF
                    424: *
                    425: *     ==== Return the number of deflations ... ====
                    426: *
                    427:       ND = JW - NS
                    428: *
                    429: *     ==== ... and the number of shifts. (Subtracting
                    430: *     .    INFQR from the spike length takes care
                    431: *     .    of the case of a rare QR failure while
                    432: *     .    calculating eigenvalues of the deflation
                    433: *     .    window.)  ====
                    434: *
                    435:       NS = NS - INFQR
                    436: *
                    437: *      ==== Return optimal workspace. ====
                    438: *
                    439:       WORK( 1 ) = DCMPLX( LWKOPT, 0 )
                    440: *
                    441: *     ==== End of ZLAQR3 ====
                    442: *
                    443:       END

CVSweb interface <joel.bertrand@systella.fr>