Annotation of rpl/lapack/lapack/zlaqr5.f, revision 1.21
1.13 bertrand 1: *> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep.
1.10 bertrand 2: *
3: * =========== DOCUMENTATION ===========
4: *
1.18 bertrand 5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
1.10 bertrand 7: *
8: *> \htmlonly
1.18 bertrand 9: *> Download ZLAQR5 + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f">
1.10 bertrand 15: *> [TXT]</a>
1.18 bertrand 16: *> \endhtmlonly
1.10 bertrand 17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
22: * H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
23: * WV, LDWV, NH, WH, LDWH )
1.18 bertrand 24: *
1.10 bertrand 25: * .. Scalar Arguments ..
26: * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27: * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28: * LOGICAL WANTT, WANTZ
29: * ..
30: * .. Array Arguments ..
31: * COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
32: * $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
33: * ..
1.18 bertrand 34: *
1.10 bertrand 35: *
36: *> \par Purpose:
37: * =============
38: *>
39: *> \verbatim
40: *>
41: *> ZLAQR5, called by ZLAQR0, performs a
42: *> single small-bulge multi-shift QR sweep.
43: *> \endverbatim
44: *
45: * Arguments:
46: * ==========
47: *
48: *> \param[in] WANTT
49: *> \verbatim
1.20 bertrand 50: *> WANTT is LOGICAL
1.10 bertrand 51: *> WANTT = .true. if the triangular Schur factor
52: *> is being computed. WANTT is set to .false. otherwise.
53: *> \endverbatim
54: *>
55: *> \param[in] WANTZ
56: *> \verbatim
1.20 bertrand 57: *> WANTZ is LOGICAL
1.10 bertrand 58: *> WANTZ = .true. if the unitary Schur factor is being
59: *> computed. WANTZ is set to .false. otherwise.
60: *> \endverbatim
61: *>
62: *> \param[in] KACC22
63: *> \verbatim
1.20 bertrand 64: *> KACC22 is INTEGER with value 0, 1, or 2.
1.10 bertrand 65: *> Specifies the computation mode of far-from-diagonal
66: *> orthogonal updates.
67: *> = 0: ZLAQR5 does not accumulate reflections and does not
68: *> use matrix-matrix multiply to update far-from-diagonal
69: *> matrix entries.
70: *> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
71: *> multiply to update the far-from-diagonal matrix entries.
72: *> = 2: ZLAQR5 accumulates reflections, uses matrix-matrix
73: *> multiply to update the far-from-diagonal matrix entries,
74: *> and takes advantage of 2-by-2 block structure during
75: *> matrix multiplies.
76: *> \endverbatim
77: *>
78: *> \param[in] N
79: *> \verbatim
1.20 bertrand 80: *> N is INTEGER
1.10 bertrand 81: *> N is the order of the Hessenberg matrix H upon which this
82: *> subroutine operates.
83: *> \endverbatim
84: *>
85: *> \param[in] KTOP
86: *> \verbatim
1.20 bertrand 87: *> KTOP is INTEGER
1.10 bertrand 88: *> \endverbatim
89: *>
90: *> \param[in] KBOT
91: *> \verbatim
1.20 bertrand 92: *> KBOT is INTEGER
1.10 bertrand 93: *> These are the first and last rows and columns of an
94: *> isolated diagonal block upon which the QR sweep is to be
95: *> applied. It is assumed without a check that
96: *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
97: *> and
98: *> either KBOT = N or H(KBOT+1,KBOT) = 0.
99: *> \endverbatim
100: *>
101: *> \param[in] NSHFTS
102: *> \verbatim
1.20 bertrand 103: *> NSHFTS is INTEGER
1.10 bertrand 104: *> NSHFTS gives the number of simultaneous shifts. NSHFTS
105: *> must be positive and even.
106: *> \endverbatim
107: *>
108: *> \param[in,out] S
109: *> \verbatim
1.20 bertrand 110: *> S is COMPLEX*16 array, dimension (NSHFTS)
1.10 bertrand 111: *> S contains the shifts of origin that define the multi-
112: *> shift QR sweep. On output S may be reordered.
113: *> \endverbatim
114: *>
115: *> \param[in,out] H
116: *> \verbatim
1.20 bertrand 117: *> H is COMPLEX*16 array, dimension (LDH,N)
1.10 bertrand 118: *> On input H contains a Hessenberg matrix. On output a
119: *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
120: *> to the isolated diagonal block in rows and columns KTOP
121: *> through KBOT.
122: *> \endverbatim
123: *>
124: *> \param[in] LDH
125: *> \verbatim
1.20 bertrand 126: *> LDH is INTEGER
1.10 bertrand 127: *> LDH is the leading dimension of H just as declared in the
128: *> calling procedure. LDH.GE.MAX(1,N).
129: *> \endverbatim
130: *>
131: *> \param[in] ILOZ
132: *> \verbatim
133: *> ILOZ is INTEGER
134: *> \endverbatim
135: *>
136: *> \param[in] IHIZ
137: *> \verbatim
138: *> IHIZ is INTEGER
139: *> Specify the rows of Z to which transformations must be
140: *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
141: *> \endverbatim
142: *>
143: *> \param[in,out] Z
144: *> \verbatim
1.20 bertrand 145: *> Z is COMPLEX*16 array, dimension (LDZ,IHIZ)
1.10 bertrand 146: *> If WANTZ = .TRUE., then the QR Sweep unitary
147: *> similarity transformation is accumulated into
1.16 bertrand 148: *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
1.10 bertrand 149: *> If WANTZ = .FALSE., then Z is unreferenced.
150: *> \endverbatim
151: *>
152: *> \param[in] LDZ
153: *> \verbatim
1.20 bertrand 154: *> LDZ is INTEGER
1.10 bertrand 155: *> LDA is the leading dimension of Z just as declared in
156: *> the calling procedure. LDZ.GE.N.
157: *> \endverbatim
158: *>
159: *> \param[out] V
160: *> \verbatim
1.20 bertrand 161: *> V is COMPLEX*16 array, dimension (LDV,NSHFTS/2)
1.10 bertrand 162: *> \endverbatim
163: *>
164: *> \param[in] LDV
165: *> \verbatim
1.20 bertrand 166: *> LDV is INTEGER
1.10 bertrand 167: *> LDV is the leading dimension of V as declared in the
168: *> calling procedure. LDV.GE.3.
169: *> \endverbatim
170: *>
171: *> \param[out] U
172: *> \verbatim
1.20 bertrand 173: *> U is COMPLEX*16 array, dimension (LDU,3*NSHFTS-3)
1.10 bertrand 174: *> \endverbatim
175: *>
176: *> \param[in] LDU
177: *> \verbatim
1.20 bertrand 178: *> LDU is INTEGER
1.10 bertrand 179: *> LDU is the leading dimension of U just as declared in the
180: *> in the calling subroutine. LDU.GE.3*NSHFTS-3.
181: *> \endverbatim
182: *>
183: *> \param[in] NH
184: *> \verbatim
1.20 bertrand 185: *> NH is INTEGER
1.10 bertrand 186: *> NH is the number of columns in array WH available for
187: *> workspace. NH.GE.1.
188: *> \endverbatim
189: *>
190: *> \param[out] WH
191: *> \verbatim
1.20 bertrand 192: *> WH is COMPLEX*16 array, dimension (LDWH,NH)
1.10 bertrand 193: *> \endverbatim
194: *>
195: *> \param[in] LDWH
196: *> \verbatim
1.20 bertrand 197: *> LDWH is INTEGER
1.10 bertrand 198: *> Leading dimension of WH just as declared in the
199: *> calling procedure. LDWH.GE.3*NSHFTS-3.
200: *> \endverbatim
201: *>
202: *> \param[in] NV
203: *> \verbatim
1.20 bertrand 204: *> NV is INTEGER
1.10 bertrand 205: *> NV is the number of rows in WV agailable for workspace.
206: *> NV.GE.1.
207: *> \endverbatim
208: *>
209: *> \param[out] WV
210: *> \verbatim
1.20 bertrand 211: *> WV is COMPLEX*16 array, dimension (LDWV,3*NSHFTS-3)
1.10 bertrand 212: *> \endverbatim
213: *>
214: *> \param[in] LDWV
215: *> \verbatim
1.20 bertrand 216: *> LDWV is INTEGER
1.10 bertrand 217: *> LDWV is the leading dimension of WV as declared in the
218: *> in the calling subroutine. LDWV.GE.NV.
219: *> \endverbatim
220: *
221: * Authors:
222: * ========
223: *
1.18 bertrand 224: *> \author Univ. of Tennessee
225: *> \author Univ. of California Berkeley
226: *> \author Univ. of Colorado Denver
227: *> \author NAG Ltd.
1.10 bertrand 228: *
1.16 bertrand 229: *> \date June 2016
1.10 bertrand 230: *
231: *> \ingroup complex16OTHERauxiliary
232: *
233: *> \par Contributors:
234: * ==================
235: *>
236: *> Karen Braman and Ralph Byers, Department of Mathematics,
237: *> University of Kansas, USA
238: *
239: *> \par References:
240: * ================
241: *>
242: *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
243: *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
244: *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
245: *> 929--947, 2002.
246: *>
247: * =====================================================================
1.1 bertrand 248: SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
249: $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
250: $ WV, LDWV, NH, WH, LDWH )
251: *
1.20 bertrand 252: * -- LAPACK auxiliary routine (version 3.7.1) --
1.10 bertrand 253: * -- LAPACK is a software package provided by Univ. of Tennessee, --
254: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
1.16 bertrand 255: * June 2016
1.1 bertrand 256: *
257: * .. Scalar Arguments ..
258: INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
259: $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
260: LOGICAL WANTT, WANTZ
261: * ..
262: * .. Array Arguments ..
263: COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
264: $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
265: * ..
266: *
1.10 bertrand 267: * ================================================================
1.1 bertrand 268: * .. Parameters ..
269: COMPLEX*16 ZERO, ONE
270: PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ),
271: $ ONE = ( 1.0d0, 0.0d0 ) )
272: DOUBLE PRECISION RZERO, RONE
273: PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 )
274: * ..
275: * .. Local Scalars ..
276: COMPLEX*16 ALPHA, BETA, CDUM, REFSUM
277: DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
278: $ SMLNUM, TST1, TST2, ULP
279: INTEGER I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
280: $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
281: $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
282: $ NS, NU
283: LOGICAL ACCUM, BLK22, BMP22
284: * ..
285: * .. External Functions ..
286: DOUBLE PRECISION DLAMCH
287: EXTERNAL DLAMCH
288: * ..
289: * .. Intrinsic Functions ..
290: *
291: INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD
292: * ..
293: * .. Local Arrays ..
294: COMPLEX*16 VT( 3 )
295: * ..
296: * .. External Subroutines ..
297: EXTERNAL DLABAD, ZGEMM, ZLACPY, ZLAQR1, ZLARFG, ZLASET,
298: $ ZTRMM
299: * ..
300: * .. Statement Functions ..
301: DOUBLE PRECISION CABS1
302: * ..
303: * .. Statement Function definitions ..
304: CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
305: * ..
306: * .. Executable Statements ..
307: *
308: * ==== If there are no shifts, then there is nothing to do. ====
309: *
310: IF( NSHFTS.LT.2 )
311: $ RETURN
312: *
313: * ==== If the active block is empty or 1-by-1, then there
314: * . is nothing to do. ====
315: *
316: IF( KTOP.GE.KBOT )
317: $ RETURN
318: *
319: * ==== NSHFTS is supposed to be even, but if it is odd,
320: * . then simply reduce it by one. ====
321: *
322: NS = NSHFTS - MOD( NSHFTS, 2 )
323: *
324: * ==== Machine constants for deflation ====
325: *
326: SAFMIN = DLAMCH( 'SAFE MINIMUM' )
327: SAFMAX = RONE / SAFMIN
328: CALL DLABAD( SAFMIN, SAFMAX )
329: ULP = DLAMCH( 'PRECISION' )
330: SMLNUM = SAFMIN*( DBLE( N ) / ULP )
331: *
332: * ==== Use accumulated reflections to update far-from-diagonal
333: * . entries ? ====
334: *
335: ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
336: *
337: * ==== If so, exploit the 2-by-2 block structure? ====
338: *
339: BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
340: *
341: * ==== clear trash ====
342: *
343: IF( KTOP+2.LE.KBOT )
344: $ H( KTOP+2, KTOP ) = ZERO
345: *
346: * ==== NBMPS = number of 2-shift bulges in the chain ====
347: *
348: NBMPS = NS / 2
349: *
350: * ==== KDU = width of slab ====
351: *
352: KDU = 6*NBMPS - 3
353: *
354: * ==== Create and chase chains of NBMPS bulges ====
355: *
356: DO 210 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
357: NDCOL = INCOL + KDU
358: IF( ACCUM )
359: $ CALL ZLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
360: *
361: * ==== Near-the-diagonal bulge chase. The following loop
362: * . performs the near-the-diagonal part of a small bulge
363: * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
364: * . chunk extends from column INCOL to column NDCOL
365: * . (including both column INCOL and column NDCOL). The
366: * . following loop chases a 3*NBMPS column long chain of
367: * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
368: * . may be less than KTOP and and NDCOL may be greater than
369: * . KBOT indicating phantom columns from which to chase
370: * . bulges before they are actually introduced or to which
371: * . to chase bulges beyond column KBOT.) ====
372: *
373: DO 140 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
374: *
375: * ==== Bulges number MTOP to MBOT are active double implicit
376: * . shift bulges. There may or may not also be small
377: * . 2-by-2 bulge, if there is room. The inactive bulges
378: * . (if any) must wait until the active bulges have moved
379: * . down the diagonal to make room. The phantom matrix
380: * . paradigm described above helps keep track. ====
381: *
382: MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
383: MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
384: M22 = MBOT + 1
385: BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
386: $ ( KBOT-2 )
387: *
388: * ==== Generate reflections to chase the chain right
389: * . one column. (The minimum value of K is KTOP-1.) ====
390: *
391: DO 10 M = MTOP, MBOT
392: K = KRCOL + 3*( M-1 )
393: IF( K.EQ.KTOP-1 ) THEN
394: CALL ZLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ),
395: $ S( 2*M ), V( 1, M ) )
396: ALPHA = V( 1, M )
397: CALL ZLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
398: ELSE
399: BETA = H( K+1, K )
400: V( 2, M ) = H( K+2, K )
401: V( 3, M ) = H( K+3, K )
402: CALL ZLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
403: *
404: * ==== A Bulge may collapse because of vigilant
405: * . deflation or destructive underflow. In the
406: * . underflow case, try the two-small-subdiagonals
407: * . trick to try to reinflate the bulge. ====
408: *
409: IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
410: $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
411: *
412: * ==== Typical case: not collapsed (yet). ====
413: *
414: H( K+1, K ) = BETA
415: H( K+2, K ) = ZERO
416: H( K+3, K ) = ZERO
417: ELSE
418: *
419: * ==== Atypical case: collapsed. Attempt to
420: * . reintroduce ignoring H(K+1,K) and H(K+2,K).
421: * . If the fill resulting from the new
422: * . reflector is too large, then abandon it.
423: * . Otherwise, use the new one. ====
424: *
425: CALL ZLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ),
426: $ S( 2*M ), VT )
427: ALPHA = VT( 1 )
428: CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
429: REFSUM = DCONJG( VT( 1 ) )*
430: $ ( H( K+1, K )+DCONJG( VT( 2 ) )*
431: $ H( K+2, K ) )
432: *
433: IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+
434: $ CABS1( REFSUM*VT( 3 ) ).GT.ULP*
435: $ ( CABS1( H( K, K ) )+CABS1( H( K+1,
436: $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN
437: *
438: * ==== Starting a new bulge here would
439: * . create non-negligible fill. Use
440: * . the old one with trepidation. ====
441: *
442: H( K+1, K ) = BETA
443: H( K+2, K ) = ZERO
444: H( K+3, K ) = ZERO
445: ELSE
446: *
447: * ==== Stating a new bulge here would
448: * . create only negligible fill.
449: * . Replace the old reflector with
450: * . the new one. ====
451: *
452: H( K+1, K ) = H( K+1, K ) - REFSUM
453: H( K+2, K ) = ZERO
454: H( K+3, K ) = ZERO
455: V( 1, M ) = VT( 1 )
456: V( 2, M ) = VT( 2 )
457: V( 3, M ) = VT( 3 )
458: END IF
459: END IF
460: END IF
461: 10 CONTINUE
462: *
463: * ==== Generate a 2-by-2 reflection, if needed. ====
464: *
465: K = KRCOL + 3*( M22-1 )
466: IF( BMP22 ) THEN
467: IF( K.EQ.KTOP-1 ) THEN
468: CALL ZLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ),
469: $ S( 2*M22 ), V( 1, M22 ) )
470: BETA = V( 1, M22 )
471: CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
472: ELSE
473: BETA = H( K+1, K )
474: V( 2, M22 ) = H( K+2, K )
475: CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
476: H( K+1, K ) = BETA
477: H( K+2, K ) = ZERO
478: END IF
479: END IF
480: *
481: * ==== Multiply H by reflections from the left ====
482: *
483: IF( ACCUM ) THEN
484: JBOT = MIN( NDCOL, KBOT )
485: ELSE IF( WANTT ) THEN
486: JBOT = N
487: ELSE
488: JBOT = KBOT
489: END IF
490: DO 30 J = MAX( KTOP, KRCOL ), JBOT
491: MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
492: DO 20 M = MTOP, MEND
493: K = KRCOL + 3*( M-1 )
494: REFSUM = DCONJG( V( 1, M ) )*
495: $ ( H( K+1, J )+DCONJG( V( 2, M ) )*
496: $ H( K+2, J )+DCONJG( V( 3, M ) )*H( K+3, J ) )
497: H( K+1, J ) = H( K+1, J ) - REFSUM
498: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
499: H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
500: 20 CONTINUE
501: 30 CONTINUE
502: IF( BMP22 ) THEN
503: K = KRCOL + 3*( M22-1 )
504: DO 40 J = MAX( K+1, KTOP ), JBOT
505: REFSUM = DCONJG( V( 1, M22 ) )*
506: $ ( H( K+1, J )+DCONJG( V( 2, M22 ) )*
507: $ H( K+2, J ) )
508: H( K+1, J ) = H( K+1, J ) - REFSUM
509: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
510: 40 CONTINUE
511: END IF
512: *
513: * ==== Multiply H by reflections from the right.
514: * . Delay filling in the last row until the
515: * . vigilant deflation check is complete. ====
516: *
517: IF( ACCUM ) THEN
518: JTOP = MAX( KTOP, INCOL )
519: ELSE IF( WANTT ) THEN
520: JTOP = 1
521: ELSE
522: JTOP = KTOP
523: END IF
524: DO 80 M = MTOP, MBOT
525: IF( V( 1, M ).NE.ZERO ) THEN
526: K = KRCOL + 3*( M-1 )
527: DO 50 J = JTOP, MIN( KBOT, K+3 )
528: REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
529: $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
530: H( J, K+1 ) = H( J, K+1 ) - REFSUM
531: H( J, K+2 ) = H( J, K+2 ) -
532: $ REFSUM*DCONJG( V( 2, M ) )
533: H( J, K+3 ) = H( J, K+3 ) -
534: $ REFSUM*DCONJG( V( 3, M ) )
535: 50 CONTINUE
536: *
537: IF( ACCUM ) THEN
538: *
539: * ==== Accumulate U. (If necessary, update Z later
540: * . with with an efficient matrix-matrix
541: * . multiply.) ====
542: *
543: KMS = K - INCOL
544: DO 60 J = MAX( 1, KTOP-INCOL ), KDU
545: REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
546: $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
547: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
548: U( J, KMS+2 ) = U( J, KMS+2 ) -
549: $ REFSUM*DCONJG( V( 2, M ) )
550: U( J, KMS+3 ) = U( J, KMS+3 ) -
551: $ REFSUM*DCONJG( V( 3, M ) )
552: 60 CONTINUE
553: ELSE IF( WANTZ ) THEN
554: *
555: * ==== U is not accumulated, so update Z
556: * . now by multiplying by reflections
557: * . from the right. ====
558: *
559: DO 70 J = ILOZ, IHIZ
560: REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
561: $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
562: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
563: Z( J, K+2 ) = Z( J, K+2 ) -
564: $ REFSUM*DCONJG( V( 2, M ) )
565: Z( J, K+3 ) = Z( J, K+3 ) -
566: $ REFSUM*DCONJG( V( 3, M ) )
567: 70 CONTINUE
568: END IF
569: END IF
570: 80 CONTINUE
571: *
572: * ==== Special case: 2-by-2 reflection (if needed) ====
573: *
574: K = KRCOL + 3*( M22-1 )
1.7 bertrand 575: IF( BMP22 ) THEN
576: IF ( V( 1, M22 ).NE.ZERO ) THEN
577: DO 90 J = JTOP, MIN( KBOT, K+3 )
578: REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
579: $ H( J, K+2 ) )
580: H( J, K+1 ) = H( J, K+1 ) - REFSUM
581: H( J, K+2 ) = H( J, K+2 ) -
1.1 bertrand 582: $ REFSUM*DCONJG( V( 2, M22 ) )
1.7 bertrand 583: 90 CONTINUE
584: *
585: IF( ACCUM ) THEN
586: KMS = K - INCOL
587: DO 100 J = MAX( 1, KTOP-INCOL ), KDU
588: REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
589: $ V( 2, M22 )*U( J, KMS+2 ) )
590: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
591: U( J, KMS+2 ) = U( J, KMS+2 ) -
592: $ REFSUM*DCONJG( V( 2, M22 ) )
593: 100 CONTINUE
594: ELSE IF( WANTZ ) THEN
595: DO 110 J = ILOZ, IHIZ
596: REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
597: $ Z( J, K+2 ) )
598: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
599: Z( J, K+2 ) = Z( J, K+2 ) -
600: $ REFSUM*DCONJG( V( 2, M22 ) )
601: 110 CONTINUE
602: END IF
1.1 bertrand 603: END IF
604: END IF
605: *
606: * ==== Vigilant deflation check ====
607: *
608: MSTART = MTOP
609: IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
610: $ MSTART = MSTART + 1
611: MEND = MBOT
612: IF( BMP22 )
613: $ MEND = MEND + 1
614: IF( KRCOL.EQ.KBOT-2 )
615: $ MEND = MEND + 1
616: DO 120 M = MSTART, MEND
617: K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
618: *
619: * ==== The following convergence test requires that
620: * . the tradition small-compared-to-nearby-diagonals
621: * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
622: * . criteria both be satisfied. The latter improves
623: * . accuracy in some examples. Falling back on an
624: * . alternate convergence criterion when TST1 or TST2
625: * . is zero (as done here) is traditional but probably
626: * . unnecessary. ====
627: *
628: IF( H( K+1, K ).NE.ZERO ) THEN
629: TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) )
630: IF( TST1.EQ.RZERO ) THEN
631: IF( K.GE.KTOP+1 )
632: $ TST1 = TST1 + CABS1( H( K, K-1 ) )
633: IF( K.GE.KTOP+2 )
634: $ TST1 = TST1 + CABS1( H( K, K-2 ) )
635: IF( K.GE.KTOP+3 )
636: $ TST1 = TST1 + CABS1( H( K, K-3 ) )
637: IF( K.LE.KBOT-2 )
638: $ TST1 = TST1 + CABS1( H( K+2, K+1 ) )
639: IF( K.LE.KBOT-3 )
640: $ TST1 = TST1 + CABS1( H( K+3, K+1 ) )
641: IF( K.LE.KBOT-4 )
642: $ TST1 = TST1 + CABS1( H( K+4, K+1 ) )
643: END IF
644: IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
645: $ THEN
646: H12 = MAX( CABS1( H( K+1, K ) ),
647: $ CABS1( H( K, K+1 ) ) )
648: H21 = MIN( CABS1( H( K+1, K ) ),
649: $ CABS1( H( K, K+1 ) ) )
650: H11 = MAX( CABS1( H( K+1, K+1 ) ),
651: $ CABS1( H( K, K )-H( K+1, K+1 ) ) )
652: H22 = MIN( CABS1( H( K+1, K+1 ) ),
653: $ CABS1( H( K, K )-H( K+1, K+1 ) ) )
654: SCL = H11 + H12
655: TST2 = H22*( H11 / SCL )
656: *
657: IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE.
658: $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
659: END IF
660: END IF
661: 120 CONTINUE
662: *
663: * ==== Fill in the last row of each bulge. ====
664: *
665: MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
666: DO 130 M = MTOP, MEND
667: K = KRCOL + 3*( M-1 )
668: REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
669: H( K+4, K+1 ) = -REFSUM
670: H( K+4, K+2 ) = -REFSUM*DCONJG( V( 2, M ) )
671: H( K+4, K+3 ) = H( K+4, K+3 ) -
672: $ REFSUM*DCONJG( V( 3, M ) )
673: 130 CONTINUE
674: *
675: * ==== End of near-the-diagonal bulge chase. ====
676: *
677: 140 CONTINUE
678: *
679: * ==== Use U (if accumulated) to update far-from-diagonal
680: * . entries in H. If required, use U to update Z as
681: * . well. ====
682: *
683: IF( ACCUM ) THEN
684: IF( WANTT ) THEN
685: JTOP = 1
686: JBOT = N
687: ELSE
688: JTOP = KTOP
689: JBOT = KBOT
690: END IF
691: IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
692: $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
693: *
694: * ==== Updates not exploiting the 2-by-2 block
695: * . structure of U. K1 and NU keep track of
696: * . the location and size of U in the special
697: * . cases of introducing bulges and chasing
698: * . bulges off the bottom. In these special
699: * . cases and in case the number of shifts
700: * . is NS = 2, there is no 2-by-2 block
701: * . structure to exploit. ====
702: *
703: K1 = MAX( 1, KTOP-INCOL )
704: NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
705: *
706: * ==== Horizontal Multiply ====
707: *
708: DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
709: JLEN = MIN( NH, JBOT-JCOL+1 )
710: CALL ZGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
711: $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
712: $ LDWH )
713: CALL ZLACPY( 'ALL', NU, JLEN, WH, LDWH,
714: $ H( INCOL+K1, JCOL ), LDH )
715: 150 CONTINUE
716: *
717: * ==== Vertical multiply ====
718: *
719: DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
720: JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
721: CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
722: $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
723: $ LDU, ZERO, WV, LDWV )
724: CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
725: $ H( JROW, INCOL+K1 ), LDH )
726: 160 CONTINUE
727: *
728: * ==== Z multiply (also vertical) ====
729: *
730: IF( WANTZ ) THEN
731: DO 170 JROW = ILOZ, IHIZ, NV
732: JLEN = MIN( NV, IHIZ-JROW+1 )
733: CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
734: $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
735: $ LDU, ZERO, WV, LDWV )
736: CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
737: $ Z( JROW, INCOL+K1 ), LDZ )
738: 170 CONTINUE
739: END IF
740: ELSE
741: *
742: * ==== Updates exploiting U's 2-by-2 block structure.
743: * . (I2, I4, J2, J4 are the last rows and columns
744: * . of the blocks.) ====
745: *
746: I2 = ( KDU+1 ) / 2
747: I4 = KDU
748: J2 = I4 - I2
749: J4 = KDU
750: *
751: * ==== KZS and KNZ deal with the band of zeros
752: * . along the diagonal of one of the triangular
753: * . blocks. ====
754: *
755: KZS = ( J4-J2 ) - ( NS+1 )
756: KNZ = NS + 1
757: *
758: * ==== Horizontal multiply ====
759: *
760: DO 180 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
761: JLEN = MIN( NH, JBOT-JCOL+1 )
762: *
763: * ==== Copy bottom of H to top+KZS of scratch ====
764: * (The first KZS rows get multiplied by zero.) ====
765: *
766: CALL ZLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
767: $ LDH, WH( KZS+1, 1 ), LDWH )
768: *
1.9 bertrand 769: * ==== Multiply by U21**H ====
1.1 bertrand 770: *
771: CALL ZLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
772: CALL ZTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
773: $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
774: $ LDWH )
775: *
1.9 bertrand 776: * ==== Multiply top of H by U11**H ====
1.1 bertrand 777: *
778: CALL ZGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
779: $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
780: *
781: * ==== Copy top of H to bottom of WH ====
782: *
783: CALL ZLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
784: $ WH( I2+1, 1 ), LDWH )
785: *
1.9 bertrand 786: * ==== Multiply by U21**H ====
1.1 bertrand 787: *
788: CALL ZTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
789: $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
790: *
791: * ==== Multiply by U22 ====
792: *
793: CALL ZGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
794: $ U( J2+1, I2+1 ), LDU,
795: $ H( INCOL+1+J2, JCOL ), LDH, ONE,
796: $ WH( I2+1, 1 ), LDWH )
797: *
798: * ==== Copy it back ====
799: *
800: CALL ZLACPY( 'ALL', KDU, JLEN, WH, LDWH,
801: $ H( INCOL+1, JCOL ), LDH )
802: 180 CONTINUE
803: *
804: * ==== Vertical multiply ====
805: *
806: DO 190 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
807: JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
808: *
809: * ==== Copy right of H to scratch (the first KZS
810: * . columns get multiplied by zero) ====
811: *
812: CALL ZLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
813: $ LDH, WV( 1, 1+KZS ), LDWV )
814: *
815: * ==== Multiply by U21 ====
816: *
817: CALL ZLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
818: CALL ZTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
819: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
820: $ LDWV )
821: *
822: * ==== Multiply by U11 ====
823: *
824: CALL ZGEMM( 'N', 'N', JLEN, I2, J2, ONE,
825: $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
826: $ LDWV )
827: *
828: * ==== Copy left of H to right of scratch ====
829: *
830: CALL ZLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
831: $ WV( 1, 1+I2 ), LDWV )
832: *
833: * ==== Multiply by U21 ====
834: *
835: CALL ZTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
836: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
837: *
838: * ==== Multiply by U22 ====
839: *
840: CALL ZGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
841: $ H( JROW, INCOL+1+J2 ), LDH,
842: $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
843: $ LDWV )
844: *
845: * ==== Copy it back ====
846: *
847: CALL ZLACPY( 'ALL', JLEN, KDU, WV, LDWV,
848: $ H( JROW, INCOL+1 ), LDH )
849: 190 CONTINUE
850: *
851: * ==== Multiply Z (also vertical) ====
852: *
853: IF( WANTZ ) THEN
854: DO 200 JROW = ILOZ, IHIZ, NV
855: JLEN = MIN( NV, IHIZ-JROW+1 )
856: *
857: * ==== Copy right of Z to left of scratch (first
858: * . KZS columns get multiplied by zero) ====
859: *
860: CALL ZLACPY( 'ALL', JLEN, KNZ,
861: $ Z( JROW, INCOL+1+J2 ), LDZ,
862: $ WV( 1, 1+KZS ), LDWV )
863: *
864: * ==== Multiply by U12 ====
865: *
866: CALL ZLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
867: $ LDWV )
868: CALL ZTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
869: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
870: $ LDWV )
871: *
872: * ==== Multiply by U11 ====
873: *
874: CALL ZGEMM( 'N', 'N', JLEN, I2, J2, ONE,
875: $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
876: $ WV, LDWV )
877: *
878: * ==== Copy left of Z to right of scratch ====
879: *
880: CALL ZLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
881: $ LDZ, WV( 1, 1+I2 ), LDWV )
882: *
883: * ==== Multiply by U21 ====
884: *
885: CALL ZTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
886: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
887: $ LDWV )
888: *
889: * ==== Multiply by U22 ====
890: *
891: CALL ZGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
892: $ Z( JROW, INCOL+1+J2 ), LDZ,
893: $ U( J2+1, I2+1 ), LDU, ONE,
894: $ WV( 1, 1+I2 ), LDWV )
895: *
896: * ==== Copy the result back to Z ====
897: *
898: CALL ZLACPY( 'ALL', JLEN, KDU, WV, LDWV,
899: $ Z( JROW, INCOL+1 ), LDZ )
900: 200 CONTINUE
901: END IF
902: END IF
903: END IF
904: 210 CONTINUE
905: *
906: * ==== End of ZLAQR5 ====
907: *
908: END
CVSweb interface <joel.bertrand@systella.fr>