Annotation of rpl/lapack/lapack/dlaqr5.f, revision 1.10
1.10 ! bertrand 1: *> \brief \b DLAQR5
! 2: *
! 3: * =========== DOCUMENTATION ===========
! 4: *
! 5: * Online html documentation available at
! 6: * http://www.netlib.org/lapack/explore-html/
! 7: *
! 8: *> \htmlonly
! 9: *> Download DLAQR5 + dependencies
! 10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
! 11: *> [TGZ]</a>
! 12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
! 13: *> [ZIP]</a>
! 14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
! 15: *> [TXT]</a>
! 16: *> \endhtmlonly
! 17: *
! 18: * Definition:
! 19: * ===========
! 20: *
! 21: * SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
! 22: * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
! 23: * LDU, NV, WV, LDWV, NH, WH, LDWH )
! 24: *
! 25: * .. Scalar Arguments ..
! 26: * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
! 27: * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
! 28: * LOGICAL WANTT, WANTZ
! 29: * ..
! 30: * .. Array Arguments ..
! 31: * DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
! 32: * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
! 33: * $ Z( LDZ, * )
! 34: * ..
! 35: *
! 36: *
! 37: *> \par Purpose:
! 38: * =============
! 39: *>
! 40: *> \verbatim
! 41: *>
! 42: *> DLAQR5, called by DLAQR0, performs a
! 43: *> single small-bulge multi-shift QR sweep.
! 44: *> \endverbatim
! 45: *
! 46: * Arguments:
! 47: * ==========
! 48: *
! 49: *> \param[in] WANTT
! 50: *> \verbatim
! 51: *> WANTT is logical scalar
! 52: *> WANTT = .true. if the quasi-triangular Schur factor
! 53: *> is being computed. WANTT is set to .false. otherwise.
! 54: *> \endverbatim
! 55: *>
! 56: *> \param[in] WANTZ
! 57: *> \verbatim
! 58: *> WANTZ is logical scalar
! 59: *> WANTZ = .true. if the orthogonal Schur factor is being
! 60: *> computed. WANTZ is set to .false. otherwise.
! 61: *> \endverbatim
! 62: *>
! 63: *> \param[in] KACC22
! 64: *> \verbatim
! 65: *> KACC22 is integer with value 0, 1, or 2.
! 66: *> Specifies the computation mode of far-from-diagonal
! 67: *> orthogonal updates.
! 68: *> = 0: DLAQR5 does not accumulate reflections and does not
! 69: *> use matrix-matrix multiply to update far-from-diagonal
! 70: *> matrix entries.
! 71: *> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
! 72: *> multiply to update the far-from-diagonal matrix entries.
! 73: *> = 2: DLAQR5 accumulates reflections, uses matrix-matrix
! 74: *> multiply to update the far-from-diagonal matrix entries,
! 75: *> and takes advantage of 2-by-2 block structure during
! 76: *> matrix multiplies.
! 77: *> \endverbatim
! 78: *>
! 79: *> \param[in] N
! 80: *> \verbatim
! 81: *> N is integer scalar
! 82: *> N is the order of the Hessenberg matrix H upon which this
! 83: *> subroutine operates.
! 84: *> \endverbatim
! 85: *>
! 86: *> \param[in] KTOP
! 87: *> \verbatim
! 88: *> KTOP is integer scalar
! 89: *> \endverbatim
! 90: *>
! 91: *> \param[in] KBOT
! 92: *> \verbatim
! 93: *> KBOT is integer scalar
! 94: *> These are the first and last rows and columns of an
! 95: *> isolated diagonal block upon which the QR sweep is to be
! 96: *> applied. It is assumed without a check that
! 97: *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
! 98: *> and
! 99: *> either KBOT = N or H(KBOT+1,KBOT) = 0.
! 100: *> \endverbatim
! 101: *>
! 102: *> \param[in] NSHFTS
! 103: *> \verbatim
! 104: *> NSHFTS is integer scalar
! 105: *> NSHFTS gives the number of simultaneous shifts. NSHFTS
! 106: *> must be positive and even.
! 107: *> \endverbatim
! 108: *>
! 109: *> \param[in,out] SR
! 110: *> \verbatim
! 111: *> SR is DOUBLE PRECISION array of size (NSHFTS)
! 112: *> \endverbatim
! 113: *>
! 114: *> \param[in,out] SI
! 115: *> \verbatim
! 116: *> SI is DOUBLE PRECISION array of size (NSHFTS)
! 117: *> SR contains the real parts and SI contains the imaginary
! 118: *> parts of the NSHFTS shifts of origin that define the
! 119: *> multi-shift QR sweep. On output SR and SI may be
! 120: *> reordered.
! 121: *> \endverbatim
! 122: *>
! 123: *> \param[in,out] H
! 124: *> \verbatim
! 125: *> H is DOUBLE PRECISION array of size (LDH,N)
! 126: *> On input H contains a Hessenberg matrix. On output a
! 127: *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
! 128: *> to the isolated diagonal block in rows and columns KTOP
! 129: *> through KBOT.
! 130: *> \endverbatim
! 131: *>
! 132: *> \param[in] LDH
! 133: *> \verbatim
! 134: *> LDH is integer scalar
! 135: *> LDH is the leading dimension of H just as declared in the
! 136: *> calling procedure. LDH.GE.MAX(1,N).
! 137: *> \endverbatim
! 138: *>
! 139: *> \param[in] ILOZ
! 140: *> \verbatim
! 141: *> ILOZ is INTEGER
! 142: *> \endverbatim
! 143: *>
! 144: *> \param[in] IHIZ
! 145: *> \verbatim
! 146: *> IHIZ is INTEGER
! 147: *> Specify the rows of Z to which transformations must be
! 148: *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
! 149: *> \endverbatim
! 150: *>
! 151: *> \param[in,out] Z
! 152: *> \verbatim
! 153: *> Z is DOUBLE PRECISION array of size (LDZ,IHI)
! 154: *> If WANTZ = .TRUE., then the QR Sweep orthogonal
! 155: *> similarity transformation is accumulated into
! 156: *> Z(ILOZ:IHIZ,ILO:IHI) from the right.
! 157: *> If WANTZ = .FALSE., then Z is unreferenced.
! 158: *> \endverbatim
! 159: *>
! 160: *> \param[in] LDZ
! 161: *> \verbatim
! 162: *> LDZ is integer scalar
! 163: *> LDA is the leading dimension of Z just as declared in
! 164: *> the calling procedure. LDZ.GE.N.
! 165: *> \endverbatim
! 166: *>
! 167: *> \param[out] V
! 168: *> \verbatim
! 169: *> V is DOUBLE PRECISION array of size (LDV,NSHFTS/2)
! 170: *> \endverbatim
! 171: *>
! 172: *> \param[in] LDV
! 173: *> \verbatim
! 174: *> LDV is integer scalar
! 175: *> LDV is the leading dimension of V as declared in the
! 176: *> calling procedure. LDV.GE.3.
! 177: *> \endverbatim
! 178: *>
! 179: *> \param[out] U
! 180: *> \verbatim
! 181: *> U is DOUBLE PRECISION array of size
! 182: *> (LDU,3*NSHFTS-3)
! 183: *> \endverbatim
! 184: *>
! 185: *> \param[in] LDU
! 186: *> \verbatim
! 187: *> LDU is integer scalar
! 188: *> LDU is the leading dimension of U just as declared in the
! 189: *> in the calling subroutine. LDU.GE.3*NSHFTS-3.
! 190: *> \endverbatim
! 191: *>
! 192: *> \param[in] NH
! 193: *> \verbatim
! 194: *> NH is integer scalar
! 195: *> NH is the number of columns in array WH available for
! 196: *> workspace. NH.GE.1.
! 197: *> \endverbatim
! 198: *>
! 199: *> \param[out] WH
! 200: *> \verbatim
! 201: *> WH is DOUBLE PRECISION array of size (LDWH,NH)
! 202: *> \endverbatim
! 203: *>
! 204: *> \param[in] LDWH
! 205: *> \verbatim
! 206: *> LDWH is integer scalar
! 207: *> Leading dimension of WH just as declared in the
! 208: *> calling procedure. LDWH.GE.3*NSHFTS-3.
! 209: *> \endverbatim
! 210: *>
! 211: *> \param[in] NV
! 212: *> \verbatim
! 213: *> NV is integer scalar
! 214: *> NV is the number of rows in WV agailable for workspace.
! 215: *> NV.GE.1.
! 216: *> \endverbatim
! 217: *>
! 218: *> \param[out] WV
! 219: *> \verbatim
! 220: *> WV is DOUBLE PRECISION array of size
! 221: *> (LDWV,3*NSHFTS-3)
! 222: *> \endverbatim
! 223: *>
! 224: *> \param[in] LDWV
! 225: *> \verbatim
! 226: *> LDWV is integer scalar
! 227: *> LDWV is the leading dimension of WV as declared in the
! 228: *> in the calling subroutine. LDWV.GE.NV.
! 229: *> \endverbatim
! 230: *
! 231: * Authors:
! 232: * ========
! 233: *
! 234: *> \author Univ. of Tennessee
! 235: *> \author Univ. of California Berkeley
! 236: *> \author Univ. of Colorado Denver
! 237: *> \author NAG Ltd.
! 238: *
! 239: *> \date November 2011
! 240: *
! 241: *> \ingroup doubleOTHERauxiliary
! 242: *
! 243: *> \par Contributors:
! 244: * ==================
! 245: *>
! 246: *> Karen Braman and Ralph Byers, Department of Mathematics,
! 247: *> University of Kansas, USA
! 248: *
! 249: *> \par References:
! 250: * ================
! 251: *>
! 252: *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
! 253: *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
! 254: *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
! 255: *> 929--947, 2002.
! 256: *>
! 257: * =====================================================================
1.1 bertrand 258: SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
259: $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
260: $ LDU, NV, WV, LDWV, NH, WH, LDWH )
261: *
1.10 ! bertrand 262: * -- LAPACK auxiliary routine (version 3.4.0) --
1.1 bertrand 263: * -- LAPACK is a software package provided by Univ. of Tennessee, --
264: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
1.10 ! bertrand 265: * November 2011
1.1 bertrand 266: *
267: * .. Scalar Arguments ..
268: INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
269: $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
270: LOGICAL WANTT, WANTZ
271: * ..
272: * .. Array Arguments ..
273: DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
274: $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
275: $ Z( LDZ, * )
276: * ..
277: *
1.10 ! bertrand 278: * ================================================================
1.1 bertrand 279: * .. Parameters ..
280: DOUBLE PRECISION ZERO, ONE
281: PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
282: * ..
283: * .. Local Scalars ..
284: DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
285: $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
286: $ ULP
287: INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
288: $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
289: $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
290: $ NS, NU
291: LOGICAL ACCUM, BLK22, BMP22
292: * ..
293: * .. External Functions ..
294: DOUBLE PRECISION DLAMCH
295: EXTERNAL DLAMCH
296: * ..
297: * .. Intrinsic Functions ..
298: *
299: INTRINSIC ABS, DBLE, MAX, MIN, MOD
300: * ..
301: * .. Local Arrays ..
302: DOUBLE PRECISION VT( 3 )
303: * ..
304: * .. External Subroutines ..
305: EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET,
306: $ DTRMM
307: * ..
308: * .. Executable Statements ..
309: *
310: * ==== If there are no shifts, then there is nothing to do. ====
311: *
312: IF( NSHFTS.LT.2 )
313: $ RETURN
314: *
315: * ==== If the active block is empty or 1-by-1, then there
316: * . is nothing to do. ====
317: *
318: IF( KTOP.GE.KBOT )
319: $ RETURN
320: *
321: * ==== Shuffle shifts into pairs of real shifts and pairs
322: * . of complex conjugate shifts assuming complex
323: * . conjugate shifts are already adjacent to one
324: * . another. ====
325: *
326: DO 10 I = 1, NSHFTS - 2, 2
327: IF( SI( I ).NE.-SI( I+1 ) ) THEN
328: *
329: SWAP = SR( I )
330: SR( I ) = SR( I+1 )
331: SR( I+1 ) = SR( I+2 )
332: SR( I+2 ) = SWAP
333: *
334: SWAP = SI( I )
335: SI( I ) = SI( I+1 )
336: SI( I+1 ) = SI( I+2 )
337: SI( I+2 ) = SWAP
338: END IF
339: 10 CONTINUE
340: *
341: * ==== NSHFTS is supposed to be even, but if it is odd,
342: * . then simply reduce it by one. The shuffle above
343: * . ensures that the dropped shift is real and that
344: * . the remaining shifts are paired. ====
345: *
346: NS = NSHFTS - MOD( NSHFTS, 2 )
347: *
348: * ==== Machine constants for deflation ====
349: *
350: SAFMIN = DLAMCH( 'SAFE MINIMUM' )
351: SAFMAX = ONE / SAFMIN
352: CALL DLABAD( SAFMIN, SAFMAX )
353: ULP = DLAMCH( 'PRECISION' )
354: SMLNUM = SAFMIN*( DBLE( N ) / ULP )
355: *
356: * ==== Use accumulated reflections to update far-from-diagonal
357: * . entries ? ====
358: *
359: ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
360: *
361: * ==== If so, exploit the 2-by-2 block structure? ====
362: *
363: BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
364: *
365: * ==== clear trash ====
366: *
367: IF( KTOP+2.LE.KBOT )
368: $ H( KTOP+2, KTOP ) = ZERO
369: *
370: * ==== NBMPS = number of 2-shift bulges in the chain ====
371: *
372: NBMPS = NS / 2
373: *
374: * ==== KDU = width of slab ====
375: *
376: KDU = 6*NBMPS - 3
377: *
378: * ==== Create and chase chains of NBMPS bulges ====
379: *
380: DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
381: NDCOL = INCOL + KDU
382: IF( ACCUM )
383: $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
384: *
385: * ==== Near-the-diagonal bulge chase. The following loop
386: * . performs the near-the-diagonal part of a small bulge
387: * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
388: * . chunk extends from column INCOL to column NDCOL
389: * . (including both column INCOL and column NDCOL). The
390: * . following loop chases a 3*NBMPS column long chain of
391: * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
392: * . may be less than KTOP and and NDCOL may be greater than
393: * . KBOT indicating phantom columns from which to chase
394: * . bulges before they are actually introduced or to which
395: * . to chase bulges beyond column KBOT.) ====
396: *
397: DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
398: *
399: * ==== Bulges number MTOP to MBOT are active double implicit
400: * . shift bulges. There may or may not also be small
401: * . 2-by-2 bulge, if there is room. The inactive bulges
402: * . (if any) must wait until the active bulges have moved
403: * . down the diagonal to make room. The phantom matrix
404: * . paradigm described above helps keep track. ====
405: *
406: MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
407: MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
408: M22 = MBOT + 1
409: BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
410: $ ( KBOT-2 )
411: *
412: * ==== Generate reflections to chase the chain right
413: * . one column. (The minimum value of K is KTOP-1.) ====
414: *
415: DO 20 M = MTOP, MBOT
416: K = KRCOL + 3*( M-1 )
417: IF( K.EQ.KTOP-1 ) THEN
418: CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
419: $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
420: $ V( 1, M ) )
421: ALPHA = V( 1, M )
422: CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
423: ELSE
424: BETA = H( K+1, K )
425: V( 2, M ) = H( K+2, K )
426: V( 3, M ) = H( K+3, K )
427: CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
428: *
429: * ==== A Bulge may collapse because of vigilant
430: * . deflation or destructive underflow. In the
431: * . underflow case, try the two-small-subdiagonals
432: * . trick to try to reinflate the bulge. ====
433: *
434: IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
435: $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
436: *
437: * ==== Typical case: not collapsed (yet). ====
438: *
439: H( K+1, K ) = BETA
440: H( K+2, K ) = ZERO
441: H( K+3, K ) = ZERO
442: ELSE
443: *
444: * ==== Atypical case: collapsed. Attempt to
445: * . reintroduce ignoring H(K+1,K) and H(K+2,K).
446: * . If the fill resulting from the new
447: * . reflector is too large, then abandon it.
448: * . Otherwise, use the new one. ====
449: *
450: CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
451: $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
452: $ VT )
453: ALPHA = VT( 1 )
454: CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
455: REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
456: $ H( K+2, K ) )
457: *
458: IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
459: $ ABS( REFSUM*VT( 3 ) ).GT.ULP*
460: $ ( ABS( H( K, K ) )+ABS( H( K+1,
461: $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
462: *
463: * ==== Starting a new bulge here would
464: * . create non-negligible fill. Use
465: * . the old one with trepidation. ====
466: *
467: H( K+1, K ) = BETA
468: H( K+2, K ) = ZERO
469: H( K+3, K ) = ZERO
470: ELSE
471: *
472: * ==== Stating a new bulge here would
473: * . create only negligible fill.
474: * . Replace the old reflector with
475: * . the new one. ====
476: *
477: H( K+1, K ) = H( K+1, K ) - REFSUM
478: H( K+2, K ) = ZERO
479: H( K+3, K ) = ZERO
480: V( 1, M ) = VT( 1 )
481: V( 2, M ) = VT( 2 )
482: V( 3, M ) = VT( 3 )
483: END IF
484: END IF
485: END IF
486: 20 CONTINUE
487: *
488: * ==== Generate a 2-by-2 reflection, if needed. ====
489: *
490: K = KRCOL + 3*( M22-1 )
491: IF( BMP22 ) THEN
492: IF( K.EQ.KTOP-1 ) THEN
493: CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
494: $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
495: $ V( 1, M22 ) )
496: BETA = V( 1, M22 )
497: CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
498: ELSE
499: BETA = H( K+1, K )
500: V( 2, M22 ) = H( K+2, K )
501: CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
502: H( K+1, K ) = BETA
503: H( K+2, K ) = ZERO
504: END IF
505: END IF
506: *
507: * ==== Multiply H by reflections from the left ====
508: *
509: IF( ACCUM ) THEN
510: JBOT = MIN( NDCOL, KBOT )
511: ELSE IF( WANTT ) THEN
512: JBOT = N
513: ELSE
514: JBOT = KBOT
515: END IF
516: DO 40 J = MAX( KTOP, KRCOL ), JBOT
517: MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
518: DO 30 M = MTOP, MEND
519: K = KRCOL + 3*( M-1 )
520: REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
521: $ H( K+2, J )+V( 3, M )*H( K+3, J ) )
522: H( K+1, J ) = H( K+1, J ) - REFSUM
523: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
524: H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
525: 30 CONTINUE
526: 40 CONTINUE
527: IF( BMP22 ) THEN
528: K = KRCOL + 3*( M22-1 )
529: DO 50 J = MAX( K+1, KTOP ), JBOT
530: REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
531: $ H( K+2, J ) )
532: H( K+1, J ) = H( K+1, J ) - REFSUM
533: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
534: 50 CONTINUE
535: END IF
536: *
537: * ==== Multiply H by reflections from the right.
538: * . Delay filling in the last row until the
539: * . vigilant deflation check is complete. ====
540: *
541: IF( ACCUM ) THEN
542: JTOP = MAX( KTOP, INCOL )
543: ELSE IF( WANTT ) THEN
544: JTOP = 1
545: ELSE
546: JTOP = KTOP
547: END IF
548: DO 90 M = MTOP, MBOT
549: IF( V( 1, M ).NE.ZERO ) THEN
550: K = KRCOL + 3*( M-1 )
551: DO 60 J = JTOP, MIN( KBOT, K+3 )
552: REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
553: $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
554: H( J, K+1 ) = H( J, K+1 ) - REFSUM
555: H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
556: H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
557: 60 CONTINUE
558: *
559: IF( ACCUM ) THEN
560: *
561: * ==== Accumulate U. (If necessary, update Z later
562: * . with with an efficient matrix-matrix
563: * . multiply.) ====
564: *
565: KMS = K - INCOL
566: DO 70 J = MAX( 1, KTOP-INCOL ), KDU
567: REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
568: $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
569: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
570: U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
571: U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
572: 70 CONTINUE
573: ELSE IF( WANTZ ) THEN
574: *
575: * ==== U is not accumulated, so update Z
576: * . now by multiplying by reflections
577: * . from the right. ====
578: *
579: DO 80 J = ILOZ, IHIZ
580: REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
581: $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
582: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
583: Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
584: Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
585: 80 CONTINUE
586: END IF
587: END IF
588: 90 CONTINUE
589: *
590: * ==== Special case: 2-by-2 reflection (if needed) ====
591: *
592: K = KRCOL + 3*( M22-1 )
1.7 bertrand 593: IF( BMP22 ) THEN
594: IF ( V( 1, M22 ).NE.ZERO ) THEN
595: DO 100 J = JTOP, MIN( KBOT, K+3 )
596: REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
597: $ H( J, K+2 ) )
598: H( J, K+1 ) = H( J, K+1 ) - REFSUM
599: H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
600: 100 CONTINUE
601: *
602: IF( ACCUM ) THEN
603: KMS = K - INCOL
604: DO 110 J = MAX( 1, KTOP-INCOL ), KDU
605: REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
606: $ V( 2, M22 )*U( J, KMS+2 ) )
607: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
608: U( J, KMS+2 ) = U( J, KMS+2 ) -
609: $ REFSUM*V( 2, M22 )
1.1 bertrand 610: 110 CONTINUE
1.7 bertrand 611: ELSE IF( WANTZ ) THEN
612: DO 120 J = ILOZ, IHIZ
613: REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
614: $ Z( J, K+2 ) )
615: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
616: Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
617: 120 CONTINUE
618: END IF
1.1 bertrand 619: END IF
620: END IF
621: *
622: * ==== Vigilant deflation check ====
623: *
624: MSTART = MTOP
625: IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
626: $ MSTART = MSTART + 1
627: MEND = MBOT
628: IF( BMP22 )
629: $ MEND = MEND + 1
630: IF( KRCOL.EQ.KBOT-2 )
631: $ MEND = MEND + 1
632: DO 130 M = MSTART, MEND
633: K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
634: *
635: * ==== The following convergence test requires that
636: * . the tradition small-compared-to-nearby-diagonals
637: * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
638: * . criteria both be satisfied. The latter improves
639: * . accuracy in some examples. Falling back on an
640: * . alternate convergence criterion when TST1 or TST2
641: * . is zero (as done here) is traditional but probably
642: * . unnecessary. ====
643: *
644: IF( H( K+1, K ).NE.ZERO ) THEN
645: TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
646: IF( TST1.EQ.ZERO ) THEN
647: IF( K.GE.KTOP+1 )
648: $ TST1 = TST1 + ABS( H( K, K-1 ) )
649: IF( K.GE.KTOP+2 )
650: $ TST1 = TST1 + ABS( H( K, K-2 ) )
651: IF( K.GE.KTOP+3 )
652: $ TST1 = TST1 + ABS( H( K, K-3 ) )
653: IF( K.LE.KBOT-2 )
654: $ TST1 = TST1 + ABS( H( K+2, K+1 ) )
655: IF( K.LE.KBOT-3 )
656: $ TST1 = TST1 + ABS( H( K+3, K+1 ) )
657: IF( K.LE.KBOT-4 )
658: $ TST1 = TST1 + ABS( H( K+4, K+1 ) )
659: END IF
660: IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
661: $ THEN
662: H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
663: H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
664: H11 = MAX( ABS( H( K+1, K+1 ) ),
665: $ ABS( H( K, K )-H( K+1, K+1 ) ) )
666: H22 = MIN( ABS( H( K+1, K+1 ) ),
667: $ ABS( H( K, K )-H( K+1, K+1 ) ) )
668: SCL = H11 + H12
669: TST2 = H22*( H11 / SCL )
670: *
671: IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
672: $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
673: END IF
674: END IF
675: 130 CONTINUE
676: *
677: * ==== Fill in the last row of each bulge. ====
678: *
679: MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
680: DO 140 M = MTOP, MEND
681: K = KRCOL + 3*( M-1 )
682: REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
683: H( K+4, K+1 ) = -REFSUM
684: H( K+4, K+2 ) = -REFSUM*V( 2, M )
685: H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
686: 140 CONTINUE
687: *
688: * ==== End of near-the-diagonal bulge chase. ====
689: *
690: 150 CONTINUE
691: *
692: * ==== Use U (if accumulated) to update far-from-diagonal
693: * . entries in H. If required, use U to update Z as
694: * . well. ====
695: *
696: IF( ACCUM ) THEN
697: IF( WANTT ) THEN
698: JTOP = 1
699: JBOT = N
700: ELSE
701: JTOP = KTOP
702: JBOT = KBOT
703: END IF
704: IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
705: $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
706: *
707: * ==== Updates not exploiting the 2-by-2 block
708: * . structure of U. K1 and NU keep track of
709: * . the location and size of U in the special
710: * . cases of introducing bulges and chasing
711: * . bulges off the bottom. In these special
712: * . cases and in case the number of shifts
713: * . is NS = 2, there is no 2-by-2 block
714: * . structure to exploit. ====
715: *
716: K1 = MAX( 1, KTOP-INCOL )
717: NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
718: *
719: * ==== Horizontal Multiply ====
720: *
721: DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
722: JLEN = MIN( NH, JBOT-JCOL+1 )
723: CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
724: $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
725: $ LDWH )
726: CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH,
727: $ H( INCOL+K1, JCOL ), LDH )
728: 160 CONTINUE
729: *
730: * ==== Vertical multiply ====
731: *
732: DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
733: JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
734: CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
735: $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
736: $ LDU, ZERO, WV, LDWV )
737: CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
738: $ H( JROW, INCOL+K1 ), LDH )
739: 170 CONTINUE
740: *
741: * ==== Z multiply (also vertical) ====
742: *
743: IF( WANTZ ) THEN
744: DO 180 JROW = ILOZ, IHIZ, NV
745: JLEN = MIN( NV, IHIZ-JROW+1 )
746: CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
747: $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
748: $ LDU, ZERO, WV, LDWV )
749: CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
750: $ Z( JROW, INCOL+K1 ), LDZ )
751: 180 CONTINUE
752: END IF
753: ELSE
754: *
755: * ==== Updates exploiting U's 2-by-2 block structure.
756: * . (I2, I4, J2, J4 are the last rows and columns
757: * . of the blocks.) ====
758: *
759: I2 = ( KDU+1 ) / 2
760: I4 = KDU
761: J2 = I4 - I2
762: J4 = KDU
763: *
764: * ==== KZS and KNZ deal with the band of zeros
765: * . along the diagonal of one of the triangular
766: * . blocks. ====
767: *
768: KZS = ( J4-J2 ) - ( NS+1 )
769: KNZ = NS + 1
770: *
771: * ==== Horizontal multiply ====
772: *
773: DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
774: JLEN = MIN( NH, JBOT-JCOL+1 )
775: *
776: * ==== Copy bottom of H to top+KZS of scratch ====
777: * (The first KZS rows get multiplied by zero.) ====
778: *
779: CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
780: $ LDH, WH( KZS+1, 1 ), LDWH )
781: *
1.9 bertrand 782: * ==== Multiply by U21**T ====
1.1 bertrand 783: *
784: CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
785: CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
786: $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
787: $ LDWH )
788: *
1.9 bertrand 789: * ==== Multiply top of H by U11**T ====
1.1 bertrand 790: *
791: CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
792: $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
793: *
794: * ==== Copy top of H to bottom of WH ====
795: *
796: CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
797: $ WH( I2+1, 1 ), LDWH )
798: *
1.9 bertrand 799: * ==== Multiply by U21**T ====
1.1 bertrand 800: *
801: CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
802: $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
803: *
804: * ==== Multiply by U22 ====
805: *
806: CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
807: $ U( J2+1, I2+1 ), LDU,
808: $ H( INCOL+1+J2, JCOL ), LDH, ONE,
809: $ WH( I2+1, 1 ), LDWH )
810: *
811: * ==== Copy it back ====
812: *
813: CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH,
814: $ H( INCOL+1, JCOL ), LDH )
815: 190 CONTINUE
816: *
817: * ==== Vertical multiply ====
818: *
819: DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
820: JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
821: *
822: * ==== Copy right of H to scratch (the first KZS
823: * . columns get multiplied by zero) ====
824: *
825: CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
826: $ LDH, WV( 1, 1+KZS ), LDWV )
827: *
828: * ==== Multiply by U21 ====
829: *
830: CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
831: CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
832: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
833: $ LDWV )
834: *
835: * ==== Multiply by U11 ====
836: *
837: CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
838: $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
839: $ LDWV )
840: *
841: * ==== Copy left of H to right of scratch ====
842: *
843: CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
844: $ WV( 1, 1+I2 ), LDWV )
845: *
846: * ==== Multiply by U21 ====
847: *
848: CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
849: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
850: *
851: * ==== Multiply by U22 ====
852: *
853: CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
854: $ H( JROW, INCOL+1+J2 ), LDH,
855: $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
856: $ LDWV )
857: *
858: * ==== Copy it back ====
859: *
860: CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
861: $ H( JROW, INCOL+1 ), LDH )
862: 200 CONTINUE
863: *
864: * ==== Multiply Z (also vertical) ====
865: *
866: IF( WANTZ ) THEN
867: DO 210 JROW = ILOZ, IHIZ, NV
868: JLEN = MIN( NV, IHIZ-JROW+1 )
869: *
870: * ==== Copy right of Z to left of scratch (first
871: * . KZS columns get multiplied by zero) ====
872: *
873: CALL DLACPY( 'ALL', JLEN, KNZ,
874: $ Z( JROW, INCOL+1+J2 ), LDZ,
875: $ WV( 1, 1+KZS ), LDWV )
876: *
877: * ==== Multiply by U12 ====
878: *
879: CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
880: $ LDWV )
881: CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
882: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
883: $ LDWV )
884: *
885: * ==== Multiply by U11 ====
886: *
887: CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
888: $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
889: $ WV, LDWV )
890: *
891: * ==== Copy left of Z to right of scratch ====
892: *
893: CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
894: $ LDZ, WV( 1, 1+I2 ), LDWV )
895: *
896: * ==== Multiply by U21 ====
897: *
898: CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
899: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
900: $ LDWV )
901: *
902: * ==== Multiply by U22 ====
903: *
904: CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
905: $ Z( JROW, INCOL+1+J2 ), LDZ,
906: $ U( J2+1, I2+1 ), LDU, ONE,
907: $ WV( 1, 1+I2 ), LDWV )
908: *
909: * ==== Copy the result back to Z ====
910: *
911: CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
912: $ Z( JROW, INCOL+1 ), LDZ )
913: 210 CONTINUE
914: END IF
915: END IF
916: END IF
917: 220 CONTINUE
918: *
919: * ==== End of DLAQR5 ====
920: *
921: END
CVSweb interface <joel.bertrand@systella.fr>