Annotation of rpl/lapack/lapack/zlaqr2.f, revision 1.3

1.1       bertrand    1:       SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
                      2:      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
                      3:      $                   NV, WV, LDWV, WORK, LWORK )
                      4: *
                      5: *  -- LAPACK auxiliary routine (version 3.2.1)                        --
                      6: *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
                      7: *  -- April 2009                                                      --
                      8: *
                      9: *     .. Scalar Arguments ..
                     10:       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
                     11:      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
                     12:       LOGICAL            WANTT, WANTZ
                     13: *     ..
                     14: *     .. Array Arguments ..
                     15:       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
                     16:      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
                     17: *     ..
                     18: *
                     19: *     This subroutine is identical to ZLAQR3 except that it avoids
                     20: *     recursion by calling ZLAHQR instead of ZLAQR4.
                     21: *
                     22: *
                     23: *     ******************************************************************
                     24: *     Aggressive early deflation:
                     25: *
                     26: *     This subroutine accepts as input an upper Hessenberg matrix
                     27: *     H and performs an unitary similarity transformation
                     28: *     designed to detect and deflate fully converged eigenvalues from
                     29: *     a trailing principal submatrix.  On output H has been over-
                     30: *     written by a new Hessenberg matrix that is a perturbation of
                     31: *     an unitary similarity transformation of H.  It is to be
                     32: *     hoped that the final version of H has many zero subdiagonal
                     33: *     entries.
                     34: *
                     35: *     ******************************************************************
                     36: *     WANTT   (input) LOGICAL
                     37: *          If .TRUE., then the Hessenberg matrix H is fully updated
                     38: *          so that the triangular Schur factor may be
                     39: *          computed (in cooperation with the calling subroutine).
                     40: *          If .FALSE., then only enough of H is updated to preserve
                     41: *          the eigenvalues.
                     42: *
                     43: *     WANTZ   (input) LOGICAL
                     44: *          If .TRUE., then the unitary matrix Z is updated so
                     45: *          so that the unitary Schur factor may be computed
                     46: *          (in cooperation with the calling subroutine).
                     47: *          If .FALSE., then Z is not referenced.
                     48: *
                     49: *     N       (input) INTEGER
                     50: *          The order of the matrix H and (if WANTZ is .TRUE.) the
                     51: *          order of the unitary matrix Z.
                     52: *
                     53: *     KTOP    (input) INTEGER
                     54: *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
                     55: *          KBOT and KTOP together determine an isolated block
                     56: *          along the diagonal of the Hessenberg matrix.
                     57: *
                     58: *     KBOT    (input) INTEGER
                     59: *          It is assumed without a check that either
                     60: *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
                     61: *          determine an isolated block along the diagonal of the
                     62: *          Hessenberg matrix.
                     63: *
                     64: *     NW      (input) INTEGER
                     65: *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
                     66: *
                     67: *     H       (input/output) COMPLEX*16 array, dimension (LDH,N)
                     68: *          On input the initial N-by-N section of H stores the
                     69: *          Hessenberg matrix undergoing aggressive early deflation.
                     70: *          On output H has been transformed by a unitary
                     71: *          similarity transformation, perturbed, and the returned
                     72: *          to Hessenberg form that (it is to be hoped) has some
                     73: *          zero subdiagonal entries.
                     74: *
                     75: *     LDH     (input) integer
                     76: *          Leading dimension of H just as declared in the calling
                     77: *          subroutine.  N .LE. LDH
                     78: *
                     79: *     ILOZ    (input) INTEGER
                     80: *     IHIZ    (input) INTEGER
                     81: *          Specify the rows of Z to which transformations must be
                     82: *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
                     83: *
                     84: *     Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
                     85: *          IF WANTZ is .TRUE., then on output, the unitary
                     86: *          similarity transformation mentioned above has been
                     87: *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
                     88: *          If WANTZ is .FALSE., then Z is unreferenced.
                     89: *
                     90: *     LDZ     (input) integer
                     91: *          The leading dimension of Z just as declared in the
                     92: *          calling subroutine.  1 .LE. LDZ.
                     93: *
                     94: *     NS      (output) integer
                     95: *          The number of unconverged (ie approximate) eigenvalues
                     96: *          returned in SR and SI that may be used as shifts by the
                     97: *          calling subroutine.
                     98: *
                     99: *     ND      (output) integer
                    100: *          The number of converged eigenvalues uncovered by this
                    101: *          subroutine.
                    102: *
                    103: *     SH      (output) COMPLEX*16 array, dimension KBOT
                    104: *          On output, approximate eigenvalues that may
                    105: *          be used for shifts are stored in SH(KBOT-ND-NS+1)
                    106: *          through SR(KBOT-ND).  Converged eigenvalues are
                    107: *          stored in SH(KBOT-ND+1) through SH(KBOT).
                    108: *
                    109: *     V       (workspace) COMPLEX*16 array, dimension (LDV,NW)
                    110: *          An NW-by-NW work array.
                    111: *
                    112: *     LDV     (input) integer scalar
                    113: *          The leading dimension of V just as declared in the
                    114: *          calling subroutine.  NW .LE. LDV
                    115: *
                    116: *     NH      (input) integer scalar
                    117: *          The number of columns of T.  NH.GE.NW.
                    118: *
                    119: *     T       (workspace) COMPLEX*16 array, dimension (LDT,NW)
                    120: *
                    121: *     LDT     (input) integer
                    122: *          The leading dimension of T just as declared in the
                    123: *          calling subroutine.  NW .LE. LDT
                    124: *
                    125: *     NV      (input) integer
                    126: *          The number of rows of work array WV available for
                    127: *          workspace.  NV.GE.NW.
                    128: *
                    129: *     WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)
                    130: *
                    131: *     LDWV    (input) integer
                    132: *          The leading dimension of W just as declared in the
                    133: *          calling subroutine.  NW .LE. LDV
                    134: *
                    135: *     WORK    (workspace) COMPLEX*16 array, dimension LWORK.
                    136: *          On exit, WORK(1) is set to an estimate of the optimal value
                    137: *          of LWORK for the given values of N, NW, KTOP and KBOT.
                    138: *
                    139: *     LWORK   (input) integer
                    140: *          The dimension of the work array WORK.  LWORK = 2*NW
                    141: *          suffices, but greater efficiency may result from larger
                    142: *          values of LWORK.
                    143: *
                    144: *          If LWORK = -1, then a workspace query is assumed; ZLAQR2
                    145: *          only estimates the optimal workspace size for the given
                    146: *          values of N, NW, KTOP and KBOT.  The estimate is returned
                    147: *          in WORK(1).  No error message related to LWORK is issued
                    148: *          by XERBLA.  Neither H nor Z are accessed.
                    149: *
                    150: *     ================================================================
                    151: *     Based on contributions by
                    152: *        Karen Braman and Ralph Byers, Department of Mathematics,
                    153: *        University of Kansas, USA
                    154: *
                    155: *     ================================================================
                    156: *     .. Parameters ..
                    157:       COMPLEX*16         ZERO, ONE
                    158:       PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
                    159:      $                   ONE = ( 1.0d0, 0.0d0 ) )
                    160:       DOUBLE PRECISION   RZERO, RONE
                    161:       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
                    162: *     ..
                    163: *     .. Local Scalars ..
                    164:       COMPLEX*16         BETA, CDUM, S, TAU
                    165:       DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP
                    166:       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
                    167:      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT
                    168: *     ..
                    169: *     .. External Functions ..
                    170:       DOUBLE PRECISION   DLAMCH
                    171:       EXTERNAL           DLAMCH
                    172: *     ..
                    173: *     .. External Subroutines ..
                    174:       EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
                    175:      $                   ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
                    176: *     ..
                    177: *     .. Intrinsic Functions ..
                    178:       INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN
                    179: *     ..
                    180: *     .. Statement Functions ..
                    181:       DOUBLE PRECISION   CABS1
                    182: *     ..
                    183: *     .. Statement Function definitions ..
                    184:       CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
                    185: *     ..
                    186: *     .. Executable Statements ..
                    187: *
                    188: *     ==== Estimate optimal workspace. ====
                    189: *
                    190:       JW = MIN( NW, KBOT-KTOP+1 )
                    191:       IF( JW.LE.2 ) THEN
                    192:          LWKOPT = 1
                    193:       ELSE
                    194: *
                    195: *        ==== Workspace query call to ZGEHRD ====
                    196: *
                    197:          CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
                    198:          LWK1 = INT( WORK( 1 ) )
                    199: *
                    200: *        ==== Workspace query call to ZUNMHR ====
                    201: *
                    202:          CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
                    203:      $                WORK, -1, INFO )
                    204:          LWK2 = INT( WORK( 1 ) )
                    205: *
                    206: *        ==== Optimal workspace ====
                    207: *
                    208:          LWKOPT = JW + MAX( LWK1, LWK2 )
                    209:       END IF
                    210: *
                    211: *     ==== Quick return in case of workspace query. ====
                    212: *
                    213:       IF( LWORK.EQ.-1 ) THEN
                    214:          WORK( 1 ) = DCMPLX( LWKOPT, 0 )
                    215:          RETURN
                    216:       END IF
                    217: *
                    218: *     ==== Nothing to do ...
                    219: *     ... for an empty active block ... ====
                    220:       NS = 0
                    221:       ND = 0
                    222:       WORK( 1 ) = ONE
                    223:       IF( KTOP.GT.KBOT )
                    224:      $   RETURN
                    225: *     ... nor for an empty deflation window. ====
                    226:       IF( NW.LT.1 )
                    227:      $   RETURN
                    228: *
                    229: *     ==== Machine constants ====
                    230: *
                    231:       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
                    232:       SAFMAX = RONE / SAFMIN
                    233:       CALL DLABAD( SAFMIN, SAFMAX )
                    234:       ULP = DLAMCH( 'PRECISION' )
                    235:       SMLNUM = SAFMIN*( DBLE( N ) / ULP )
                    236: *
                    237: *     ==== Setup deflation window ====
                    238: *
                    239:       JW = MIN( NW, KBOT-KTOP+1 )
                    240:       KWTOP = KBOT - JW + 1
                    241:       IF( KWTOP.EQ.KTOP ) THEN
                    242:          S = ZERO
                    243:       ELSE
                    244:          S = H( KWTOP, KWTOP-1 )
                    245:       END IF
                    246: *
                    247:       IF( KBOT.EQ.KWTOP ) THEN
                    248: *
                    249: *        ==== 1-by-1 deflation window: not much to do ====
                    250: *
                    251:          SH( KWTOP ) = H( KWTOP, KWTOP )
                    252:          NS = 1
                    253:          ND = 0
                    254:          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
                    255:      $       KWTOP ) ) ) ) THEN
                    256:             NS = 0
                    257:             ND = 1
                    258:             IF( KWTOP.GT.KTOP )
                    259:      $         H( KWTOP, KWTOP-1 ) = ZERO
                    260:          END IF
                    261:          WORK( 1 ) = ONE
                    262:          RETURN
                    263:       END IF
                    264: *
                    265: *     ==== Convert to spike-triangular form.  (In case of a
                    266: *     .    rare QR failure, this routine continues to do
                    267: *     .    aggressive early deflation using that part of
                    268: *     .    the deflation window that converged using INFQR
                    269: *     .    here and there to keep track.) ====
                    270: *
                    271:       CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
                    272:       CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
                    273: *
                    274:       CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
                    275:       CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
                    276:      $             JW, V, LDV, INFQR )
                    277: *
                    278: *     ==== Deflation detection loop ====
                    279: *
                    280:       NS = JW
                    281:       ILST = INFQR + 1
                    282:       DO 10 KNT = INFQR + 1, JW
                    283: *
                    284: *        ==== Small spike tip deflation test ====
                    285: *
                    286:          FOO = CABS1( T( NS, NS ) )
                    287:          IF( FOO.EQ.RZERO )
                    288:      $      FOO = CABS1( S )
                    289:          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
                    290:      $        THEN
                    291: *
                    292: *           ==== One more converged eigenvalue ====
                    293: *
                    294:             NS = NS - 1
                    295:          ELSE
                    296: *
                    297: *           ==== One undeflatable eigenvalue.  Move it up out of the
                    298: *           .    way.   (ZTREXC can not fail in this case.) ====
                    299: *
                    300:             IFST = NS
                    301:             CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
                    302:             ILST = ILST + 1
                    303:          END IF
                    304:    10 CONTINUE
                    305: *
                    306: *        ==== Return to Hessenberg form ====
                    307: *
                    308:       IF( NS.EQ.0 )
                    309:      $   S = ZERO
                    310: *
                    311:       IF( NS.LT.JW ) THEN
                    312: *
                    313: *        ==== sorting the diagonal of T improves accuracy for
                    314: *        .    graded matrices.  ====
                    315: *
                    316:          DO 30 I = INFQR + 1, NS
                    317:             IFST = I
                    318:             DO 20 J = I + 1, NS
                    319:                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
                    320:      $            IFST = J
                    321:    20       CONTINUE
                    322:             ILST = I
                    323:             IF( IFST.NE.ILST )
                    324:      $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
                    325:    30    CONTINUE
                    326:       END IF
                    327: *
                    328: *     ==== Restore shift/eigenvalue array from T ====
                    329: *
                    330:       DO 40 I = INFQR + 1, JW
                    331:          SH( KWTOP+I-1 ) = T( I, I )
                    332:    40 CONTINUE
                    333: *
                    334: *
                    335:       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
                    336:          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
                    337: *
                    338: *           ==== Reflect spike back into lower triangle ====
                    339: *
                    340:             CALL ZCOPY( NS, V, LDV, WORK, 1 )
                    341:             DO 50 I = 1, NS
                    342:                WORK( I ) = DCONJG( WORK( I ) )
                    343:    50       CONTINUE
                    344:             BETA = WORK( 1 )
                    345:             CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
                    346:             WORK( 1 ) = ONE
                    347: *
                    348:             CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
                    349: *
                    350:             CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,
                    351:      $                  WORK( JW+1 ) )
                    352:             CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
                    353:      $                  WORK( JW+1 ) )
                    354:             CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
                    355:      $                  WORK( JW+1 ) )
                    356: *
                    357:             CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
                    358:      $                   LWORK-JW, INFO )
                    359:          END IF
                    360: *
                    361: *        ==== Copy updated reduced window into place ====
                    362: *
                    363:          IF( KWTOP.GT.1 )
                    364:      $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )
                    365:          CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
                    366:          CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
                    367:      $               LDH+1 )
                    368: *
                    369: *        ==== Accumulate orthogonal matrix in order update
                    370: *        .    H and Z, if requested.  ====
                    371: *
                    372:          IF( NS.GT.1 .AND. S.NE.ZERO )
                    373:      $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
                    374:      $                   WORK( JW+1 ), LWORK-JW, INFO )
                    375: *
                    376: *        ==== Update vertical slab in H ====
                    377: *
                    378:          IF( WANTT ) THEN
                    379:             LTOP = 1
                    380:          ELSE
                    381:             LTOP = KTOP
                    382:          END IF
                    383:          DO 60 KROW = LTOP, KWTOP - 1, NV
                    384:             KLN = MIN( NV, KWTOP-KROW )
                    385:             CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
                    386:      $                  LDH, V, LDV, ZERO, WV, LDWV )
                    387:             CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
                    388:    60    CONTINUE
                    389: *
                    390: *        ==== Update horizontal slab in H ====
                    391: *
                    392:          IF( WANTT ) THEN
                    393:             DO 70 KCOL = KBOT + 1, N, NH
                    394:                KLN = MIN( NH, N-KCOL+1 )
                    395:                CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
                    396:      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
                    397:                CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
                    398:      $                      LDH )
                    399:    70       CONTINUE
                    400:          END IF
                    401: *
                    402: *        ==== Update vertical slab in Z ====
                    403: *
                    404:          IF( WANTZ ) THEN
                    405:             DO 80 KROW = ILOZ, IHIZ, NV
                    406:                KLN = MIN( NV, IHIZ-KROW+1 )
                    407:                CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
                    408:      $                     LDZ, V, LDV, ZERO, WV, LDWV )
                    409:                CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
                    410:      $                      LDZ )
                    411:    80       CONTINUE
                    412:          END IF
                    413:       END IF
                    414: *
                    415: *     ==== Return the number of deflations ... ====
                    416: *
                    417:       ND = JW - NS
                    418: *
                    419: *     ==== ... and the number of shifts. (Subtracting
                    420: *     .    INFQR from the spike length takes care
                    421: *     .    of the case of a rare QR failure while
                    422: *     .    calculating eigenvalues of the deflation
                    423: *     .    window.)  ====
                    424: *
                    425:       NS = NS - INFQR
                    426: *
                    427: *      ==== Return optimal workspace. ====
                    428: *
                    429:       WORK( 1 ) = DCMPLX( LWKOPT, 0 )
                    430: *
                    431: *     ==== End of ZLAQR2 ====
                    432: *
                    433:       END

CVSweb interface <joel.bertrand@systella.fr>