1: *> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep.
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZLAQR5 + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
22: * H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
23: * WV, LDWV, NH, WH, LDWH )
24: *
25: * .. Scalar Arguments ..
26: * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27: * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28: * LOGICAL WANTT, WANTZ
29: * ..
30: * .. Array Arguments ..
31: * COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
32: * $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
33: * ..
34: *
35: *
36: *> \par Purpose:
37: * =============
38: *>
39: *> \verbatim
40: *>
41: *> ZLAQR5, called by ZLAQR0, performs a
42: *> single small-bulge multi-shift QR sweep.
43: *> \endverbatim
44: *
45: * Arguments:
46: * ==========
47: *
48: *> \param[in] WANTT
49: *> \verbatim
50: *> WANTT is LOGICAL
51: *> WANTT = .true. if the triangular Schur factor
52: *> is being computed. WANTT is set to .false. otherwise.
53: *> \endverbatim
54: *>
55: *> \param[in] WANTZ
56: *> \verbatim
57: *> WANTZ is LOGICAL
58: *> WANTZ = .true. if the unitary Schur factor is being
59: *> computed. WANTZ is set to .false. otherwise.
60: *> \endverbatim
61: *>
62: *> \param[in] KACC22
63: *> \verbatim
64: *> KACC22 is INTEGER with value 0, 1, or 2.
65: *> Specifies the computation mode of far-from-diagonal
66: *> orthogonal updates.
67: *> = 0: ZLAQR5 does not accumulate reflections and does not
68: *> use matrix-matrix multiply to update far-from-diagonal
69: *> matrix entries.
70: *> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
71: *> multiply to update the far-from-diagonal matrix entries.
72: *> = 2: Same as KACC22 = 1. This option used to enable exploiting
73: *> the 2-by-2 structure during matrix multiplications, but
74: *> this is no longer supported.
75: *> \endverbatim
76: *>
77: *> \param[in] N
78: *> \verbatim
79: *> N is INTEGER
80: *> N is the order of the Hessenberg matrix H upon which this
81: *> subroutine operates.
82: *> \endverbatim
83: *>
84: *> \param[in] KTOP
85: *> \verbatim
86: *> KTOP is INTEGER
87: *> \endverbatim
88: *>
89: *> \param[in] KBOT
90: *> \verbatim
91: *> KBOT is INTEGER
92: *> These are the first and last rows and columns of an
93: *> isolated diagonal block upon which the QR sweep is to be
94: *> applied. It is assumed without a check that
95: *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
96: *> and
97: *> either KBOT = N or H(KBOT+1,KBOT) = 0.
98: *> \endverbatim
99: *>
100: *> \param[in] NSHFTS
101: *> \verbatim
102: *> NSHFTS is INTEGER
103: *> NSHFTS gives the number of simultaneous shifts. NSHFTS
104: *> must be positive and even.
105: *> \endverbatim
106: *>
107: *> \param[in,out] S
108: *> \verbatim
109: *> S is COMPLEX*16 array, dimension (NSHFTS)
110: *> S contains the shifts of origin that define the multi-
111: *> shift QR sweep. On output S may be reordered.
112: *> \endverbatim
113: *>
114: *> \param[in,out] H
115: *> \verbatim
116: *> H is COMPLEX*16 array, dimension (LDH,N)
117: *> On input H contains a Hessenberg matrix. On output a
118: *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
119: *> to the isolated diagonal block in rows and columns KTOP
120: *> through KBOT.
121: *> \endverbatim
122: *>
123: *> \param[in] LDH
124: *> \verbatim
125: *> LDH is INTEGER
126: *> LDH is the leading dimension of H just as declared in the
127: *> calling procedure. LDH >= MAX(1,N).
128: *> \endverbatim
129: *>
130: *> \param[in] ILOZ
131: *> \verbatim
132: *> ILOZ is INTEGER
133: *> \endverbatim
134: *>
135: *> \param[in] IHIZ
136: *> \verbatim
137: *> IHIZ is INTEGER
138: *> Specify the rows of Z to which transformations must be
139: *> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
140: *> \endverbatim
141: *>
142: *> \param[in,out] Z
143: *> \verbatim
144: *> Z is COMPLEX*16 array, dimension (LDZ,IHIZ)
145: *> If WANTZ = .TRUE., then the QR Sweep unitary
146: *> similarity transformation is accumulated into
147: *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
148: *> If WANTZ = .FALSE., then Z is unreferenced.
149: *> \endverbatim
150: *>
151: *> \param[in] LDZ
152: *> \verbatim
153: *> LDZ is INTEGER
154: *> LDA is the leading dimension of Z just as declared in
155: *> the calling procedure. LDZ >= N.
156: *> \endverbatim
157: *>
158: *> \param[out] V
159: *> \verbatim
160: *> V is COMPLEX*16 array, dimension (LDV,NSHFTS/2)
161: *> \endverbatim
162: *>
163: *> \param[in] LDV
164: *> \verbatim
165: *> LDV is INTEGER
166: *> LDV is the leading dimension of V as declared in the
167: *> calling procedure. LDV >= 3.
168: *> \endverbatim
169: *>
170: *> \param[out] U
171: *> \verbatim
172: *> U is COMPLEX*16 array, dimension (LDU,2*NSHFTS)
173: *> \endverbatim
174: *>
175: *> \param[in] LDU
176: *> \verbatim
177: *> LDU is INTEGER
178: *> LDU is the leading dimension of U just as declared in the
179: *> in the calling subroutine. LDU >= 2*NSHFTS.
180: *> \endverbatim
181: *>
182: *> \param[in] NV
183: *> \verbatim
184: *> NV is INTEGER
185: *> NV is the number of rows in WV agailable for workspace.
186: *> NV >= 1.
187: *> \endverbatim
188: *>
189: *> \param[out] WV
190: *> \verbatim
191: *> WV is COMPLEX*16 array, dimension (LDWV,2*NSHFTS)
192: *> \endverbatim
193: *>
194: *> \param[in] LDWV
195: *> \verbatim
196: *> LDWV is INTEGER
197: *> LDWV is the leading dimension of WV as declared in the
198: *> in the calling subroutine. LDWV >= NV.
199: *> \endverbatim
200: *
201: *> \param[in] NH
202: *> \verbatim
203: *> NH is INTEGER
204: *> NH is the number of columns in array WH available for
205: *> workspace. NH >= 1.
206: *> \endverbatim
207: *>
208: *> \param[out] WH
209: *> \verbatim
210: *> WH is COMPLEX*16 array, dimension (LDWH,NH)
211: *> \endverbatim
212: *>
213: *> \param[in] LDWH
214: *> \verbatim
215: *> LDWH is INTEGER
216: *> Leading dimension of WH just as declared in the
217: *> calling procedure. LDWH >= 2*NSHFTS.
218: *> \endverbatim
219: *>
220: * Authors:
221: * ========
222: *
223: *> \author Univ. of Tennessee
224: *> \author Univ. of California Berkeley
225: *> \author Univ. of Colorado Denver
226: *> \author NAG Ltd.
227: *
228: *> \ingroup complex16OTHERauxiliary
229: *
230: *> \par Contributors:
231: * ==================
232: *>
233: *> Karen Braman and Ralph Byers, Department of Mathematics,
234: *> University of Kansas, USA
235: *>
236: *> Lars Karlsson, Daniel Kressner, and Bruno Lang
237: *>
238: *> Thijs Steel, Department of Computer science,
239: *> KU Leuven, Belgium
240: *
241: *> \par References:
242: * ================
243: *>
244: *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
245: *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
246: *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
247: *> 929--947, 2002.
248: *>
249: *> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
250: *> chains of bulges in multishift QR algorithms.
251: *> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
252: *>
253: * =====================================================================
254: SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
255: $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
256: $ WV, LDWV, NH, WH, LDWH )
257: IMPLICIT NONE
258: *
259: * -- LAPACK auxiliary routine --
260: * -- LAPACK is a software package provided by Univ. of Tennessee, --
261: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
262: *
263: * .. Scalar Arguments ..
264: INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
265: $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
266: LOGICAL WANTT, WANTZ
267: * ..
268: * .. Array Arguments ..
269: COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
270: $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
271: * ..
272: *
273: * ================================================================
274: * .. Parameters ..
275: COMPLEX*16 ZERO, ONE
276: PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ),
277: $ ONE = ( 1.0d0, 0.0d0 ) )
278: DOUBLE PRECISION RZERO, RONE
279: PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 )
280: * ..
281: * .. Local Scalars ..
282: COMPLEX*16 ALPHA, BETA, CDUM, REFSUM, T1, T2, T3
283: DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
284: $ SMLNUM, TST1, TST2, ULP
285: INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
286: $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
287: $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
288: $ NS, NU
289: LOGICAL ACCUM, BMP22
290: * ..
291: * .. External Functions ..
292: DOUBLE PRECISION DLAMCH
293: EXTERNAL DLAMCH
294: * ..
295: * .. Intrinsic Functions ..
296: *
297: INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD
298: * ..
299: * .. Local Arrays ..
300: COMPLEX*16 VT( 3 )
301: * ..
302: * .. External Subroutines ..
303: EXTERNAL DLABAD, ZGEMM, ZLACPY, ZLAQR1, ZLARFG, ZLASET,
304: $ ZTRMM
305: * ..
306: * .. Statement Functions ..
307: DOUBLE PRECISION CABS1
308: * ..
309: * .. Statement Function definitions ..
310: CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
311: * ..
312: * .. Executable Statements ..
313: *
314: * ==== If there are no shifts, then there is nothing to do. ====
315: *
316: IF( NSHFTS.LT.2 )
317: $ RETURN
318: *
319: * ==== If the active block is empty or 1-by-1, then there
320: * . is nothing to do. ====
321: *
322: IF( KTOP.GE.KBOT )
323: $ RETURN
324: *
325: * ==== NSHFTS is supposed to be even, but if it is odd,
326: * . then simply reduce it by one. ====
327: *
328: NS = NSHFTS - MOD( NSHFTS, 2 )
329: *
330: * ==== Machine constants for deflation ====
331: *
332: SAFMIN = DLAMCH( 'SAFE MINIMUM' )
333: SAFMAX = RONE / SAFMIN
334: CALL DLABAD( SAFMIN, SAFMAX )
335: ULP = DLAMCH( 'PRECISION' )
336: SMLNUM = SAFMIN*( DBLE( N ) / ULP )
337: *
338: * ==== Use accumulated reflections to update far-from-diagonal
339: * . entries ? ====
340: *
341: ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
342: *
343: * ==== clear trash ====
344: *
345: IF( KTOP+2.LE.KBOT )
346: $ H( KTOP+2, KTOP ) = ZERO
347: *
348: * ==== NBMPS = number of 2-shift bulges in the chain ====
349: *
350: NBMPS = NS / 2
351: *
352: * ==== KDU = width of slab ====
353: *
354: KDU = 4*NBMPS
355: *
356: * ==== Create and chase chains of NBMPS bulges ====
357: *
358: DO 180 INCOL = KTOP - 2*NBMPS + 1, KBOT - 2, 2*NBMPS
359: *
360: * JTOP = Index from which updates from the right start.
361: *
362: IF( ACCUM ) THEN
363: JTOP = MAX( KTOP, INCOL )
364: ELSE IF( WANTT ) THEN
365: JTOP = 1
366: ELSE
367: JTOP = KTOP
368: END IF
369: *
370: NDCOL = INCOL + KDU
371: IF( ACCUM )
372: $ CALL ZLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
373: *
374: * ==== Near-the-diagonal bulge chase. The following loop
375: * . performs the near-the-diagonal part of a small bulge
376: * . multi-shift QR sweep. Each 4*NBMPS column diagonal
377: * . chunk extends from column INCOL to column NDCOL
378: * . (including both column INCOL and column NDCOL). The
379: * . following loop chases a 2*NBMPS+1 column long chain of
380: * . NBMPS bulges 2*NBMPS columns to the right. (INCOL
381: * . may be less than KTOP and and NDCOL may be greater than
382: * . KBOT indicating phantom columns from which to chase
383: * . bulges before they are actually introduced or to which
384: * . to chase bulges beyond column KBOT.) ====
385: *
386: DO 145 KRCOL = INCOL, MIN( INCOL+2*NBMPS-1, KBOT-2 )
387: *
388: * ==== Bulges number MTOP to MBOT are active double implicit
389: * . shift bulges. There may or may not also be small
390: * . 2-by-2 bulge, if there is room. The inactive bulges
391: * . (if any) must wait until the active bulges have moved
392: * . down the diagonal to make room. The phantom matrix
393: * . paradigm described above helps keep track. ====
394: *
395: MTOP = MAX( 1, ( KTOP-KRCOL ) / 2+1 )
396: MBOT = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 2 )
397: M22 = MBOT + 1
398: BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+2*( M22-1 ) ).EQ.
399: $ ( KBOT-2 )
400: *
401: * ==== Generate reflections to chase the chain right
402: * . one column. (The minimum value of K is KTOP-1.) ====
403: *
404: IF ( BMP22 ) THEN
405: *
406: * ==== Special case: 2-by-2 reflection at bottom treated
407: * . separately ====
408: *
409: K = KRCOL + 2*( M22-1 )
410: IF( K.EQ.KTOP-1 ) THEN
411: CALL ZLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ),
412: $ S( 2*M22 ), V( 1, M22 ) )
413: BETA = V( 1, M22 )
414: CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
415: ELSE
416: BETA = H( K+1, K )
417: V( 2, M22 ) = H( K+2, K )
418: CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
419: H( K+1, K ) = BETA
420: H( K+2, K ) = ZERO
421: END IF
422:
423: *
424: * ==== Perform update from right within
425: * . computational window. ====
426: *
427: T1 = V( 1, M22 )
428: T2 = T1*DCONJG( V( 2, M22 ) )
429: DO 30 J = JTOP, MIN( KBOT, K+3 )
430: REFSUM = H( J, K+1 ) + V( 2, M22 )*H( J, K+2 )
431: H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
432: H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
433: 30 CONTINUE
434: *
435: * ==== Perform update from left within
436: * . computational window. ====
437: *
438: IF( ACCUM ) THEN
439: JBOT = MIN( NDCOL, KBOT )
440: ELSE IF( WANTT ) THEN
441: JBOT = N
442: ELSE
443: JBOT = KBOT
444: END IF
445: T1 = DCONJG( V( 1, M22 ) )
446: T2 = T1*V( 2, M22 )
447: DO 40 J = K+1, JBOT
448: REFSUM = H( K+1, J ) +
449: $ DCONJG( V( 2, M22 ) )*H( K+2, J )
450: H( K+1, J ) = H( K+1, J ) - REFSUM*T1
451: H( K+2, J ) = H( K+2, J ) - REFSUM*T2
452: 40 CONTINUE
453: *
454: * ==== The following convergence test requires that
455: * . the tradition small-compared-to-nearby-diagonals
456: * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
457: * . criteria both be satisfied. The latter improves
458: * . accuracy in some examples. Falling back on an
459: * . alternate convergence criterion when TST1 or TST2
460: * . is zero (as done here) is traditional but probably
461: * . unnecessary. ====
462: *
463: IF( K.GE.KTOP ) THEN
464: IF( H( K+1, K ).NE.ZERO ) THEN
465: TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) )
466: IF( TST1.EQ.RZERO ) THEN
467: IF( K.GE.KTOP+1 )
468: $ TST1 = TST1 + CABS1( H( K, K-1 ) )
469: IF( K.GE.KTOP+2 )
470: $ TST1 = TST1 + CABS1( H( K, K-2 ) )
471: IF( K.GE.KTOP+3 )
472: $ TST1 = TST1 + CABS1( H( K, K-3 ) )
473: IF( K.LE.KBOT-2 )
474: $ TST1 = TST1 + CABS1( H( K+2, K+1 ) )
475: IF( K.LE.KBOT-3 )
476: $ TST1 = TST1 + CABS1( H( K+3, K+1 ) )
477: IF( K.LE.KBOT-4 )
478: $ TST1 = TST1 + CABS1( H( K+4, K+1 ) )
479: END IF
480: IF( CABS1( H( K+1, K ) )
481: $ .LE.MAX( SMLNUM, ULP*TST1 ) ) THEN
482: H12 = MAX( CABS1( H( K+1, K ) ),
483: $ CABS1( H( K, K+1 ) ) )
484: H21 = MIN( CABS1( H( K+1, K ) ),
485: $ CABS1( H( K, K+1 ) ) )
486: H11 = MAX( CABS1( H( K+1, K+1 ) ),
487: $ CABS1( H( K, K )-H( K+1, K+1 ) ) )
488: H22 = MIN( CABS1( H( K+1, K+1 ) ),
489: $ CABS1( H( K, K )-H( K+1, K+1 ) ) )
490: SCL = H11 + H12
491: TST2 = H22*( H11 / SCL )
492: *
493: IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE.
494: $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
495: END IF
496: END IF
497: END IF
498: *
499: * ==== Accumulate orthogonal transformations. ====
500: *
501: IF( ACCUM ) THEN
502: KMS = K - INCOL
503: DO 50 J = MAX( 1, KTOP-INCOL ), KDU
504: REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
505: $ V( 2, M22 )*U( J, KMS+2 ) )
506: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
507: U( J, KMS+2 ) = U( J, KMS+2 ) -
508: $ REFSUM*DCONJG( V( 2, M22 ) )
509: 50 CONTINUE
510: ELSE IF( WANTZ ) THEN
511: DO 60 J = ILOZ, IHIZ
512: REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
513: $ Z( J, K+2 ) )
514: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
515: Z( J, K+2 ) = Z( J, K+2 ) -
516: $ REFSUM*DCONJG( V( 2, M22 ) )
517: 60 CONTINUE
518: END IF
519: END IF
520: *
521: * ==== Normal case: Chain of 3-by-3 reflections ====
522: *
523: DO 80 M = MBOT, MTOP, -1
524: K = KRCOL + 2*( M-1 )
525: IF( K.EQ.KTOP-1 ) THEN
526: CALL ZLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ),
527: $ S( 2*M ), V( 1, M ) )
528: ALPHA = V( 1, M )
529: CALL ZLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
530: ELSE
531: *
532: * ==== Perform delayed transformation of row below
533: * . Mth bulge. Exploit fact that first two elements
534: * . of row are actually zero. ====
535: *
536: REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 )
537: H( K+3, K ) = -REFSUM
538: H( K+3, K+1 ) = -REFSUM*DCONJG( V( 2, M ) )
539: H( K+3, K+2 ) = H( K+3, K+2 ) -
540: $ REFSUM*DCONJG( V( 3, M ) )
541: *
542: * ==== Calculate reflection to move
543: * . Mth bulge one step. ====
544: *
545: BETA = H( K+1, K )
546: V( 2, M ) = H( K+2, K )
547: V( 3, M ) = H( K+3, K )
548: CALL ZLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
549: *
550: * ==== A Bulge may collapse because of vigilant
551: * . deflation or destructive underflow. In the
552: * . underflow case, try the two-small-subdiagonals
553: * . trick to try to reinflate the bulge. ====
554: *
555: IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
556: $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
557: *
558: * ==== Typical case: not collapsed (yet). ====
559: *
560: H( K+1, K ) = BETA
561: H( K+2, K ) = ZERO
562: H( K+3, K ) = ZERO
563: ELSE
564: *
565: * ==== Atypical case: collapsed. Attempt to
566: * . reintroduce ignoring H(K+1,K) and H(K+2,K).
567: * . If the fill resulting from the new
568: * . reflector is too large, then abandon it.
569: * . Otherwise, use the new one. ====
570: *
571: CALL ZLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ),
572: $ S( 2*M ), VT )
573: ALPHA = VT( 1 )
574: CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
575: REFSUM = DCONJG( VT( 1 ) )*
576: $ ( H( K+1, K )+DCONJG( VT( 2 ) )*
577: $ H( K+2, K ) )
578: *
579: IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+
580: $ CABS1( REFSUM*VT( 3 ) ).GT.ULP*
581: $ ( CABS1( H( K, K ) )+CABS1( H( K+1,
582: $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN
583: *
584: * ==== Starting a new bulge here would
585: * . create non-negligible fill. Use
586: * . the old one with trepidation. ====
587: *
588: H( K+1, K ) = BETA
589: H( K+2, K ) = ZERO
590: H( K+3, K ) = ZERO
591: ELSE
592: *
593: * ==== Starting a new bulge here would
594: * . create only negligible fill.
595: * . Replace the old reflector with
596: * . the new one. ====
597: *
598: H( K+1, K ) = H( K+1, K ) - REFSUM
599: H( K+2, K ) = ZERO
600: H( K+3, K ) = ZERO
601: V( 1, M ) = VT( 1 )
602: V( 2, M ) = VT( 2 )
603: V( 3, M ) = VT( 3 )
604: END IF
605: END IF
606: END IF
607: *
608: * ==== Apply reflection from the right and
609: * . the first column of update from the left.
610: * . These updates are required for the vigilant
611: * . deflation check. We still delay most of the
612: * . updates from the left for efficiency. ====
613: *
614: T1 = V( 1, M )
615: T2 = T1*DCONJG( V( 2, M ) )
616: T3 = T1*DCONJG( V( 3, M ) )
617: DO 70 J = JTOP, MIN( KBOT, K+3 )
618: REFSUM = H( J, K+1 ) + V( 2, M )*H( J, K+2 )
619: $ + V( 3, M )*H( J, K+3 )
620: H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
621: H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
622: H( J, K+3 ) = H( J, K+3 ) - REFSUM*T3
623: 70 CONTINUE
624: *
625: * ==== Perform update from left for subsequent
626: * . column. ====
627: *
628: T1 = DCONJG( V( 1, M ) )
629: T2 = T1*V( 2, M )
630: T3 = T1*V( 3, M )
631: REFSUM = H( K+1, K+1 )
632: $ + DCONJG( V( 2, M ) )*H( K+2, K+1 )
633: $ + DCONJG( V( 3, M ) )*H( K+3, K+1 )
634: H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM*T1
635: H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*T2
636: H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*T3
637: *
638: * ==== The following convergence test requires that
639: * . the tradition small-compared-to-nearby-diagonals
640: * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
641: * . criteria both be satisfied. The latter improves
642: * . accuracy in some examples. Falling back on an
643: * . alternate convergence criterion when TST1 or TST2
644: * . is zero (as done here) is traditional but probably
645: * . unnecessary. ====
646: *
647: IF( K.LT.KTOP)
648: $ CYCLE
649: IF( H( K+1, K ).NE.ZERO ) THEN
650: TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) )
651: IF( TST1.EQ.RZERO ) THEN
652: IF( K.GE.KTOP+1 )
653: $ TST1 = TST1 + CABS1( H( K, K-1 ) )
654: IF( K.GE.KTOP+2 )
655: $ TST1 = TST1 + CABS1( H( K, K-2 ) )
656: IF( K.GE.KTOP+3 )
657: $ TST1 = TST1 + CABS1( H( K, K-3 ) )
658: IF( K.LE.KBOT-2 )
659: $ TST1 = TST1 + CABS1( H( K+2, K+1 ) )
660: IF( K.LE.KBOT-3 )
661: $ TST1 = TST1 + CABS1( H( K+3, K+1 ) )
662: IF( K.LE.KBOT-4 )
663: $ TST1 = TST1 + CABS1( H( K+4, K+1 ) )
664: END IF
665: IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
666: $ THEN
667: H12 = MAX( CABS1( H( K+1, K ) ),
668: $ CABS1( H( K, K+1 ) ) )
669: H21 = MIN( CABS1( H( K+1, K ) ),
670: $ CABS1( H( K, K+1 ) ) )
671: H11 = MAX( CABS1( H( K+1, K+1 ) ),
672: $ CABS1( H( K, K )-H( K+1, K+1 ) ) )
673: H22 = MIN( CABS1( H( K+1, K+1 ) ),
674: $ CABS1( H( K, K )-H( K+1, K+1 ) ) )
675: SCL = H11 + H12
676: TST2 = H22*( H11 / SCL )
677: *
678: IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE.
679: $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
680: END IF
681: END IF
682: 80 CONTINUE
683: *
684: * ==== Multiply H by reflections from the left ====
685: *
686: IF( ACCUM ) THEN
687: JBOT = MIN( NDCOL, KBOT )
688: ELSE IF( WANTT ) THEN
689: JBOT = N
690: ELSE
691: JBOT = KBOT
692: END IF
693: *
694: DO 100 M = MBOT, MTOP, -1
695: K = KRCOL + 2*( M-1 )
696: T1 = DCONJG( V( 1, M ) )
697: T2 = T1*V( 2, M )
698: T3 = T1*V( 3, M )
699: DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT
700: REFSUM = H( K+1, J ) + DCONJG( V( 2, M ) )*H( K+2, J )
701: $ + DCONJG( V( 3, M ) )*H( K+3, J )
702: H( K+1, J ) = H( K+1, J ) - REFSUM*T1
703: H( K+2, J ) = H( K+2, J ) - REFSUM*T2
704: H( K+3, J ) = H( K+3, J ) - REFSUM*T3
705: 90 CONTINUE
706: 100 CONTINUE
707: *
708: * ==== Accumulate orthogonal transformations. ====
709: *
710: IF( ACCUM ) THEN
711: *
712: * ==== Accumulate U. (If needed, update Z later
713: * . with an efficient matrix-matrix
714: * . multiply.) ====
715: *
716: DO 120 M = MBOT, MTOP, -1
717: K = KRCOL + 2*( M-1 )
718: KMS = K - INCOL
719: I2 = MAX( 1, KTOP-INCOL )
720: I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 )
721: I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 )
722: T1 = V( 1, M )
723: T2 = T1*DCONJG( V( 2, M ) )
724: T3 = T1*DCONJG( V( 3, M ) )
725: DO 110 J = I2, I4
726: REFSUM = U( J, KMS+1 ) + V( 2, M )*U( J, KMS+2 )
727: $ + V( 3, M )*U( J, KMS+3 )
728: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
729: U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
730: U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*T3
731: 110 CONTINUE
732: 120 CONTINUE
733: ELSE IF( WANTZ ) THEN
734: *
735: * ==== U is not accumulated, so update Z
736: * . now by multiplying by reflections
737: * . from the right. ====
738: *
739: DO 140 M = MBOT, MTOP, -1
740: K = KRCOL + 2*( M-1 )
741: T1 = V( 1, M )
742: T2 = T1*DCONJG( V( 2, M ) )
743: T3 = T1*DCONJG( V( 3, M ) )
744: DO 130 J = ILOZ, IHIZ
745: REFSUM = Z( J, K+1 ) + V( 2, M )*Z( J, K+2 )
746: $ + V( 3, M )*Z( J, K+3 )
747: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
748: Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
749: Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*T3
750: 130 CONTINUE
751: 140 CONTINUE
752: END IF
753: *
754: * ==== End of near-the-diagonal bulge chase. ====
755: *
756: 145 CONTINUE
757: *
758: * ==== Use U (if accumulated) to update far-from-diagonal
759: * . entries in H. If required, use U to update Z as
760: * . well. ====
761: *
762: IF( ACCUM ) THEN
763: IF( WANTT ) THEN
764: JTOP = 1
765: JBOT = N
766: ELSE
767: JTOP = KTOP
768: JBOT = KBOT
769: END IF
770: K1 = MAX( 1, KTOP-INCOL )
771: NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
772: *
773: * ==== Horizontal Multiply ====
774: *
775: DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
776: JLEN = MIN( NH, JBOT-JCOL+1 )
777: CALL ZGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
778: $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
779: $ LDWH )
780: CALL ZLACPY( 'ALL', NU, JLEN, WH, LDWH,
781: $ H( INCOL+K1, JCOL ), LDH )
782: 150 CONTINUE
783: *
784: * ==== Vertical multiply ====
785: *
786: DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
787: JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
788: CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
789: $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
790: $ LDU, ZERO, WV, LDWV )
791: CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
792: $ H( JROW, INCOL+K1 ), LDH )
793: 160 CONTINUE
794: *
795: * ==== Z multiply (also vertical) ====
796: *
797: IF( WANTZ ) THEN
798: DO 170 JROW = ILOZ, IHIZ, NV
799: JLEN = MIN( NV, IHIZ-JROW+1 )
800: CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
801: $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
802: $ LDU, ZERO, WV, LDWV )
803: CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
804: $ Z( JROW, INCOL+K1 ), LDZ )
805: 170 CONTINUE
806: END IF
807: END IF
808: 180 CONTINUE
809: *
810: * ==== End of ZLAQR5 ====
811: *
812: END
CVSweb interface <joel.bertrand@systella.fr>