1: *> \brief \b ZLAQR5
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZLAQR5 + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
22: * H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
23: * WV, LDWV, NH, WH, LDWH )
24: *
25: * .. Scalar Arguments ..
26: * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27: * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28: * LOGICAL WANTT, WANTZ
29: * ..
30: * .. Array Arguments ..
31: * COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
32: * $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
33: * ..
34: *
35: *
36: *> \par Purpose:
37: * =============
38: *>
39: *> \verbatim
40: *>
41: *> ZLAQR5, called by ZLAQR0, performs a
42: *> single small-bulge multi-shift QR sweep.
43: *> \endverbatim
44: *
45: * Arguments:
46: * ==========
47: *
48: *> \param[in] WANTT
49: *> \verbatim
50: *> WANTT is logical scalar
51: *> WANTT = .true. if the triangular Schur factor
52: *> is being computed. WANTT is set to .false. otherwise.
53: *> \endverbatim
54: *>
55: *> \param[in] WANTZ
56: *> \verbatim
57: *> WANTZ is logical scalar
58: *> WANTZ = .true. if the unitary Schur factor is being
59: *> computed. WANTZ is set to .false. otherwise.
60: *> \endverbatim
61: *>
62: *> \param[in] KACC22
63: *> \verbatim
64: *> KACC22 is integer with value 0, 1, or 2.
65: *> Specifies the computation mode of far-from-diagonal
66: *> orthogonal updates.
67: *> = 0: ZLAQR5 does not accumulate reflections and does not
68: *> use matrix-matrix multiply to update far-from-diagonal
69: *> matrix entries.
70: *> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
71: *> multiply to update the far-from-diagonal matrix entries.
72: *> = 2: ZLAQR5 accumulates reflections, uses matrix-matrix
73: *> multiply to update the far-from-diagonal matrix entries,
74: *> and takes advantage of 2-by-2 block structure during
75: *> matrix multiplies.
76: *> \endverbatim
77: *>
78: *> \param[in] N
79: *> \verbatim
80: *> N is integer scalar
81: *> N is the order of the Hessenberg matrix H upon which this
82: *> subroutine operates.
83: *> \endverbatim
84: *>
85: *> \param[in] KTOP
86: *> \verbatim
87: *> KTOP is integer scalar
88: *> \endverbatim
89: *>
90: *> \param[in] KBOT
91: *> \verbatim
92: *> KBOT is integer scalar
93: *> These are the first and last rows and columns of an
94: *> isolated diagonal block upon which the QR sweep is to be
95: *> applied. It is assumed without a check that
96: *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
97: *> and
98: *> either KBOT = N or H(KBOT+1,KBOT) = 0.
99: *> \endverbatim
100: *>
101: *> \param[in] NSHFTS
102: *> \verbatim
103: *> NSHFTS is integer scalar
104: *> NSHFTS gives the number of simultaneous shifts. NSHFTS
105: *> must be positive and even.
106: *> \endverbatim
107: *>
108: *> \param[in,out] S
109: *> \verbatim
110: *> S is COMPLEX*16 array of size (NSHFTS)
111: *> S contains the shifts of origin that define the multi-
112: *> shift QR sweep. On output S may be reordered.
113: *> \endverbatim
114: *>
115: *> \param[in,out] H
116: *> \verbatim
117: *> H is COMPLEX*16 array of size (LDH,N)
118: *> On input H contains a Hessenberg matrix. On output a
119: *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
120: *> to the isolated diagonal block in rows and columns KTOP
121: *> through KBOT.
122: *> \endverbatim
123: *>
124: *> \param[in] LDH
125: *> \verbatim
126: *> LDH is integer scalar
127: *> LDH is the leading dimension of H just as declared in the
128: *> calling procedure. LDH.GE.MAX(1,N).
129: *> \endverbatim
130: *>
131: *> \param[in] ILOZ
132: *> \verbatim
133: *> ILOZ is INTEGER
134: *> \endverbatim
135: *>
136: *> \param[in] IHIZ
137: *> \verbatim
138: *> IHIZ is INTEGER
139: *> Specify the rows of Z to which transformations must be
140: *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
141: *> \endverbatim
142: *>
143: *> \param[in,out] Z
144: *> \verbatim
145: *> Z is COMPLEX*16 array of size (LDZ,IHI)
146: *> If WANTZ = .TRUE., then the QR Sweep unitary
147: *> similarity transformation is accumulated into
148: *> Z(ILOZ:IHIZ,ILO:IHI) from the right.
149: *> If WANTZ = .FALSE., then Z is unreferenced.
150: *> \endverbatim
151: *>
152: *> \param[in] LDZ
153: *> \verbatim
154: *> LDZ is integer scalar
155: *> LDA is the leading dimension of Z just as declared in
156: *> the calling procedure. LDZ.GE.N.
157: *> \endverbatim
158: *>
159: *> \param[out] V
160: *> \verbatim
161: *> V is COMPLEX*16 array of size (LDV,NSHFTS/2)
162: *> \endverbatim
163: *>
164: *> \param[in] LDV
165: *> \verbatim
166: *> LDV is integer scalar
167: *> LDV is the leading dimension of V as declared in the
168: *> calling procedure. LDV.GE.3.
169: *> \endverbatim
170: *>
171: *> \param[out] U
172: *> \verbatim
173: *> U is COMPLEX*16 array of size
174: *> (LDU,3*NSHFTS-3)
175: *> \endverbatim
176: *>
177: *> \param[in] LDU
178: *> \verbatim
179: *> LDU is integer scalar
180: *> LDU is the leading dimension of U just as declared in the
181: *> in the calling subroutine. LDU.GE.3*NSHFTS-3.
182: *> \endverbatim
183: *>
184: *> \param[in] NH
185: *> \verbatim
186: *> NH is integer scalar
187: *> NH is the number of columns in array WH available for
188: *> workspace. NH.GE.1.
189: *> \endverbatim
190: *>
191: *> \param[out] WH
192: *> \verbatim
193: *> WH is COMPLEX*16 array of size (LDWH,NH)
194: *> \endverbatim
195: *>
196: *> \param[in] LDWH
197: *> \verbatim
198: *> LDWH is integer scalar
199: *> Leading dimension of WH just as declared in the
200: *> calling procedure. LDWH.GE.3*NSHFTS-3.
201: *> \endverbatim
202: *>
203: *> \param[in] NV
204: *> \verbatim
205: *> NV is integer scalar
206: *> NV is the number of rows in WV agailable for workspace.
207: *> NV.GE.1.
208: *> \endverbatim
209: *>
210: *> \param[out] WV
211: *> \verbatim
212: *> WV is COMPLEX*16 array of size
213: *> (LDWV,3*NSHFTS-3)
214: *> \endverbatim
215: *>
216: *> \param[in] LDWV
217: *> \verbatim
218: *> LDWV is integer scalar
219: *> LDWV is the leading dimension of WV as declared in the
220: *> in the calling subroutine. LDWV.GE.NV.
221: *> \endverbatim
222: *
223: * Authors:
224: * ========
225: *
226: *> \author Univ. of Tennessee
227: *> \author Univ. of California Berkeley
228: *> \author Univ. of Colorado Denver
229: *> \author NAG Ltd.
230: *
231: *> \date November 2011
232: *
233: *> \ingroup complex16OTHERauxiliary
234: *
235: *> \par Contributors:
236: * ==================
237: *>
238: *> Karen Braman and Ralph Byers, Department of Mathematics,
239: *> University of Kansas, USA
240: *
241: *> \par References:
242: * ================
243: *>
244: *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
245: *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
246: *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
247: *> 929--947, 2002.
248: *>
249: * =====================================================================
250: SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
251: $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
252: $ WV, LDWV, NH, WH, LDWH )
253: *
254: * -- LAPACK auxiliary routine (version 3.4.0) --
255: * -- LAPACK is a software package provided by Univ. of Tennessee, --
256: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
257: * November 2011
258: *
259: * .. Scalar Arguments ..
260: INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
261: $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
262: LOGICAL WANTT, WANTZ
263: * ..
264: * .. Array Arguments ..
265: COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
266: $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
267: * ..
268: *
269: * ================================================================
270: * .. Parameters ..
271: COMPLEX*16 ZERO, ONE
272: PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ),
273: $ ONE = ( 1.0d0, 0.0d0 ) )
274: DOUBLE PRECISION RZERO, RONE
275: PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 )
276: * ..
277: * .. Local Scalars ..
278: COMPLEX*16 ALPHA, BETA, CDUM, REFSUM
279: DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
280: $ SMLNUM, TST1, TST2, ULP
281: INTEGER I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
282: $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
283: $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
284: $ NS, NU
285: LOGICAL ACCUM, BLK22, BMP22
286: * ..
287: * .. External Functions ..
288: DOUBLE PRECISION DLAMCH
289: EXTERNAL DLAMCH
290: * ..
291: * .. Intrinsic Functions ..
292: *
293: INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD
294: * ..
295: * .. Local Arrays ..
296: COMPLEX*16 VT( 3 )
297: * ..
298: * .. External Subroutines ..
299: EXTERNAL DLABAD, ZGEMM, ZLACPY, ZLAQR1, ZLARFG, ZLASET,
300: $ ZTRMM
301: * ..
302: * .. Statement Functions ..
303: DOUBLE PRECISION CABS1
304: * ..
305: * .. Statement Function definitions ..
306: CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
307: * ..
308: * .. Executable Statements ..
309: *
310: * ==== If there are no shifts, then there is nothing to do. ====
311: *
312: IF( NSHFTS.LT.2 )
313: $ RETURN
314: *
315: * ==== If the active block is empty or 1-by-1, then there
316: * . is nothing to do. ====
317: *
318: IF( KTOP.GE.KBOT )
319: $ RETURN
320: *
321: * ==== NSHFTS is supposed to be even, but if it is odd,
322: * . then simply reduce it by one. ====
323: *
324: NS = NSHFTS - MOD( NSHFTS, 2 )
325: *
326: * ==== Machine constants for deflation ====
327: *
328: SAFMIN = DLAMCH( 'SAFE MINIMUM' )
329: SAFMAX = RONE / SAFMIN
330: CALL DLABAD( SAFMIN, SAFMAX )
331: ULP = DLAMCH( 'PRECISION' )
332: SMLNUM = SAFMIN*( DBLE( N ) / ULP )
333: *
334: * ==== Use accumulated reflections to update far-from-diagonal
335: * . entries ? ====
336: *
337: ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
338: *
339: * ==== If so, exploit the 2-by-2 block structure? ====
340: *
341: BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
342: *
343: * ==== clear trash ====
344: *
345: IF( KTOP+2.LE.KBOT )
346: $ H( KTOP+2, KTOP ) = ZERO
347: *
348: * ==== NBMPS = number of 2-shift bulges in the chain ====
349: *
350: NBMPS = NS / 2
351: *
352: * ==== KDU = width of slab ====
353: *
354: KDU = 6*NBMPS - 3
355: *
356: * ==== Create and chase chains of NBMPS bulges ====
357: *
358: DO 210 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
359: NDCOL = INCOL + KDU
360: IF( ACCUM )
361: $ CALL ZLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
362: *
363: * ==== Near-the-diagonal bulge chase. The following loop
364: * . performs the near-the-diagonal part of a small bulge
365: * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
366: * . chunk extends from column INCOL to column NDCOL
367: * . (including both column INCOL and column NDCOL). The
368: * . following loop chases a 3*NBMPS column long chain of
369: * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
370: * . may be less than KTOP and and NDCOL may be greater than
371: * . KBOT indicating phantom columns from which to chase
372: * . bulges before they are actually introduced or to which
373: * . to chase bulges beyond column KBOT.) ====
374: *
375: DO 140 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
376: *
377: * ==== Bulges number MTOP to MBOT are active double implicit
378: * . shift bulges. There may or may not also be small
379: * . 2-by-2 bulge, if there is room. The inactive bulges
380: * . (if any) must wait until the active bulges have moved
381: * . down the diagonal to make room. The phantom matrix
382: * . paradigm described above helps keep track. ====
383: *
384: MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
385: MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
386: M22 = MBOT + 1
387: BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
388: $ ( KBOT-2 )
389: *
390: * ==== Generate reflections to chase the chain right
391: * . one column. (The minimum value of K is KTOP-1.) ====
392: *
393: DO 10 M = MTOP, MBOT
394: K = KRCOL + 3*( M-1 )
395: IF( K.EQ.KTOP-1 ) THEN
396: CALL ZLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ),
397: $ S( 2*M ), V( 1, M ) )
398: ALPHA = V( 1, M )
399: CALL ZLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
400: ELSE
401: BETA = H( K+1, K )
402: V( 2, M ) = H( K+2, K )
403: V( 3, M ) = H( K+3, K )
404: CALL ZLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
405: *
406: * ==== A Bulge may collapse because of vigilant
407: * . deflation or destructive underflow. In the
408: * . underflow case, try the two-small-subdiagonals
409: * . trick to try to reinflate the bulge. ====
410: *
411: IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
412: $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
413: *
414: * ==== Typical case: not collapsed (yet). ====
415: *
416: H( K+1, K ) = BETA
417: H( K+2, K ) = ZERO
418: H( K+3, K ) = ZERO
419: ELSE
420: *
421: * ==== Atypical case: collapsed. Attempt to
422: * . reintroduce ignoring H(K+1,K) and H(K+2,K).
423: * . If the fill resulting from the new
424: * . reflector is too large, then abandon it.
425: * . Otherwise, use the new one. ====
426: *
427: CALL ZLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ),
428: $ S( 2*M ), VT )
429: ALPHA = VT( 1 )
430: CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
431: REFSUM = DCONJG( VT( 1 ) )*
432: $ ( H( K+1, K )+DCONJG( VT( 2 ) )*
433: $ H( K+2, K ) )
434: *
435: IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+
436: $ CABS1( REFSUM*VT( 3 ) ).GT.ULP*
437: $ ( CABS1( H( K, K ) )+CABS1( H( K+1,
438: $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN
439: *
440: * ==== Starting a new bulge here would
441: * . create non-negligible fill. Use
442: * . the old one with trepidation. ====
443: *
444: H( K+1, K ) = BETA
445: H( K+2, K ) = ZERO
446: H( K+3, K ) = ZERO
447: ELSE
448: *
449: * ==== Stating a new bulge here would
450: * . create only negligible fill.
451: * . Replace the old reflector with
452: * . the new one. ====
453: *
454: H( K+1, K ) = H( K+1, K ) - REFSUM
455: H( K+2, K ) = ZERO
456: H( K+3, K ) = ZERO
457: V( 1, M ) = VT( 1 )
458: V( 2, M ) = VT( 2 )
459: V( 3, M ) = VT( 3 )
460: END IF
461: END IF
462: END IF
463: 10 CONTINUE
464: *
465: * ==== Generate a 2-by-2 reflection, if needed. ====
466: *
467: K = KRCOL + 3*( M22-1 )
468: IF( BMP22 ) THEN
469: IF( K.EQ.KTOP-1 ) THEN
470: CALL ZLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ),
471: $ S( 2*M22 ), V( 1, M22 ) )
472: BETA = V( 1, M22 )
473: CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
474: ELSE
475: BETA = H( K+1, K )
476: V( 2, M22 ) = H( K+2, K )
477: CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
478: H( K+1, K ) = BETA
479: H( K+2, K ) = ZERO
480: END IF
481: END IF
482: *
483: * ==== Multiply H by reflections from the left ====
484: *
485: IF( ACCUM ) THEN
486: JBOT = MIN( NDCOL, KBOT )
487: ELSE IF( WANTT ) THEN
488: JBOT = N
489: ELSE
490: JBOT = KBOT
491: END IF
492: DO 30 J = MAX( KTOP, KRCOL ), JBOT
493: MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
494: DO 20 M = MTOP, MEND
495: K = KRCOL + 3*( M-1 )
496: REFSUM = DCONJG( V( 1, M ) )*
497: $ ( H( K+1, J )+DCONJG( V( 2, M ) )*
498: $ H( K+2, J )+DCONJG( V( 3, M ) )*H( K+3, J ) )
499: H( K+1, J ) = H( K+1, J ) - REFSUM
500: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
501: H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
502: 20 CONTINUE
503: 30 CONTINUE
504: IF( BMP22 ) THEN
505: K = KRCOL + 3*( M22-1 )
506: DO 40 J = MAX( K+1, KTOP ), JBOT
507: REFSUM = DCONJG( V( 1, M22 ) )*
508: $ ( H( K+1, J )+DCONJG( V( 2, M22 ) )*
509: $ H( K+2, J ) )
510: H( K+1, J ) = H( K+1, J ) - REFSUM
511: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
512: 40 CONTINUE
513: END IF
514: *
515: * ==== Multiply H by reflections from the right.
516: * . Delay filling in the last row until the
517: * . vigilant deflation check is complete. ====
518: *
519: IF( ACCUM ) THEN
520: JTOP = MAX( KTOP, INCOL )
521: ELSE IF( WANTT ) THEN
522: JTOP = 1
523: ELSE
524: JTOP = KTOP
525: END IF
526: DO 80 M = MTOP, MBOT
527: IF( V( 1, M ).NE.ZERO ) THEN
528: K = KRCOL + 3*( M-1 )
529: DO 50 J = JTOP, MIN( KBOT, K+3 )
530: REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
531: $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
532: H( J, K+1 ) = H( J, K+1 ) - REFSUM
533: H( J, K+2 ) = H( J, K+2 ) -
534: $ REFSUM*DCONJG( V( 2, M ) )
535: H( J, K+3 ) = H( J, K+3 ) -
536: $ REFSUM*DCONJG( V( 3, M ) )
537: 50 CONTINUE
538: *
539: IF( ACCUM ) THEN
540: *
541: * ==== Accumulate U. (If necessary, update Z later
542: * . with with an efficient matrix-matrix
543: * . multiply.) ====
544: *
545: KMS = K - INCOL
546: DO 60 J = MAX( 1, KTOP-INCOL ), KDU
547: REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
548: $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
549: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
550: U( J, KMS+2 ) = U( J, KMS+2 ) -
551: $ REFSUM*DCONJG( V( 2, M ) )
552: U( J, KMS+3 ) = U( J, KMS+3 ) -
553: $ REFSUM*DCONJG( V( 3, M ) )
554: 60 CONTINUE
555: ELSE IF( WANTZ ) THEN
556: *
557: * ==== U is not accumulated, so update Z
558: * . now by multiplying by reflections
559: * . from the right. ====
560: *
561: DO 70 J = ILOZ, IHIZ
562: REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
563: $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
564: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
565: Z( J, K+2 ) = Z( J, K+2 ) -
566: $ REFSUM*DCONJG( V( 2, M ) )
567: Z( J, K+3 ) = Z( J, K+3 ) -
568: $ REFSUM*DCONJG( V( 3, M ) )
569: 70 CONTINUE
570: END IF
571: END IF
572: 80 CONTINUE
573: *
574: * ==== Special case: 2-by-2 reflection (if needed) ====
575: *
576: K = KRCOL + 3*( M22-1 )
577: IF( BMP22 ) THEN
578: IF ( V( 1, M22 ).NE.ZERO ) THEN
579: DO 90 J = JTOP, MIN( KBOT, K+3 )
580: REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
581: $ H( J, K+2 ) )
582: H( J, K+1 ) = H( J, K+1 ) - REFSUM
583: H( J, K+2 ) = H( J, K+2 ) -
584: $ REFSUM*DCONJG( V( 2, M22 ) )
585: 90 CONTINUE
586: *
587: IF( ACCUM ) THEN
588: KMS = K - INCOL
589: DO 100 J = MAX( 1, KTOP-INCOL ), KDU
590: REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
591: $ V( 2, M22 )*U( J, KMS+2 ) )
592: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
593: U( J, KMS+2 ) = U( J, KMS+2 ) -
594: $ REFSUM*DCONJG( V( 2, M22 ) )
595: 100 CONTINUE
596: ELSE IF( WANTZ ) THEN
597: DO 110 J = ILOZ, IHIZ
598: REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
599: $ Z( J, K+2 ) )
600: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
601: Z( J, K+2 ) = Z( J, K+2 ) -
602: $ REFSUM*DCONJG( V( 2, M22 ) )
603: 110 CONTINUE
604: END IF
605: END IF
606: END IF
607: *
608: * ==== Vigilant deflation check ====
609: *
610: MSTART = MTOP
611: IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
612: $ MSTART = MSTART + 1
613: MEND = MBOT
614: IF( BMP22 )
615: $ MEND = MEND + 1
616: IF( KRCOL.EQ.KBOT-2 )
617: $ MEND = MEND + 1
618: DO 120 M = MSTART, MEND
619: K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
620: *
621: * ==== The following convergence test requires that
622: * . the tradition small-compared-to-nearby-diagonals
623: * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
624: * . criteria both be satisfied. The latter improves
625: * . accuracy in some examples. Falling back on an
626: * . alternate convergence criterion when TST1 or TST2
627: * . is zero (as done here) is traditional but probably
628: * . unnecessary. ====
629: *
630: IF( H( K+1, K ).NE.ZERO ) THEN
631: TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) )
632: IF( TST1.EQ.RZERO ) THEN
633: IF( K.GE.KTOP+1 )
634: $ TST1 = TST1 + CABS1( H( K, K-1 ) )
635: IF( K.GE.KTOP+2 )
636: $ TST1 = TST1 + CABS1( H( K, K-2 ) )
637: IF( K.GE.KTOP+3 )
638: $ TST1 = TST1 + CABS1( H( K, K-3 ) )
639: IF( K.LE.KBOT-2 )
640: $ TST1 = TST1 + CABS1( H( K+2, K+1 ) )
641: IF( K.LE.KBOT-3 )
642: $ TST1 = TST1 + CABS1( H( K+3, K+1 ) )
643: IF( K.LE.KBOT-4 )
644: $ TST1 = TST1 + CABS1( H( K+4, K+1 ) )
645: END IF
646: IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
647: $ THEN
648: H12 = MAX( CABS1( H( K+1, K ) ),
649: $ CABS1( H( K, K+1 ) ) )
650: H21 = MIN( CABS1( H( K+1, K ) ),
651: $ CABS1( H( K, K+1 ) ) )
652: H11 = MAX( CABS1( H( K+1, K+1 ) ),
653: $ CABS1( H( K, K )-H( K+1, K+1 ) ) )
654: H22 = MIN( CABS1( H( K+1, K+1 ) ),
655: $ CABS1( H( K, K )-H( K+1, K+1 ) ) )
656: SCL = H11 + H12
657: TST2 = H22*( H11 / SCL )
658: *
659: IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE.
660: $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
661: END IF
662: END IF
663: 120 CONTINUE
664: *
665: * ==== Fill in the last row of each bulge. ====
666: *
667: MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
668: DO 130 M = MTOP, MEND
669: K = KRCOL + 3*( M-1 )
670: REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
671: H( K+4, K+1 ) = -REFSUM
672: H( K+4, K+2 ) = -REFSUM*DCONJG( V( 2, M ) )
673: H( K+4, K+3 ) = H( K+4, K+3 ) -
674: $ REFSUM*DCONJG( V( 3, M ) )
675: 130 CONTINUE
676: *
677: * ==== End of near-the-diagonal bulge chase. ====
678: *
679: 140 CONTINUE
680: *
681: * ==== Use U (if accumulated) to update far-from-diagonal
682: * . entries in H. If required, use U to update Z as
683: * . well. ====
684: *
685: IF( ACCUM ) THEN
686: IF( WANTT ) THEN
687: JTOP = 1
688: JBOT = N
689: ELSE
690: JTOP = KTOP
691: JBOT = KBOT
692: END IF
693: IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
694: $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
695: *
696: * ==== Updates not exploiting the 2-by-2 block
697: * . structure of U. K1 and NU keep track of
698: * . the location and size of U in the special
699: * . cases of introducing bulges and chasing
700: * . bulges off the bottom. In these special
701: * . cases and in case the number of shifts
702: * . is NS = 2, there is no 2-by-2 block
703: * . structure to exploit. ====
704: *
705: K1 = MAX( 1, KTOP-INCOL )
706: NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
707: *
708: * ==== Horizontal Multiply ====
709: *
710: DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
711: JLEN = MIN( NH, JBOT-JCOL+1 )
712: CALL ZGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
713: $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
714: $ LDWH )
715: CALL ZLACPY( 'ALL', NU, JLEN, WH, LDWH,
716: $ H( INCOL+K1, JCOL ), LDH )
717: 150 CONTINUE
718: *
719: * ==== Vertical multiply ====
720: *
721: DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
722: JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
723: CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
724: $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
725: $ LDU, ZERO, WV, LDWV )
726: CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
727: $ H( JROW, INCOL+K1 ), LDH )
728: 160 CONTINUE
729: *
730: * ==== Z multiply (also vertical) ====
731: *
732: IF( WANTZ ) THEN
733: DO 170 JROW = ILOZ, IHIZ, NV
734: JLEN = MIN( NV, IHIZ-JROW+1 )
735: CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
736: $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
737: $ LDU, ZERO, WV, LDWV )
738: CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
739: $ Z( JROW, INCOL+K1 ), LDZ )
740: 170 CONTINUE
741: END IF
742: ELSE
743: *
744: * ==== Updates exploiting U's 2-by-2 block structure.
745: * . (I2, I4, J2, J4 are the last rows and columns
746: * . of the blocks.) ====
747: *
748: I2 = ( KDU+1 ) / 2
749: I4 = KDU
750: J2 = I4 - I2
751: J4 = KDU
752: *
753: * ==== KZS and KNZ deal with the band of zeros
754: * . along the diagonal of one of the triangular
755: * . blocks. ====
756: *
757: KZS = ( J4-J2 ) - ( NS+1 )
758: KNZ = NS + 1
759: *
760: * ==== Horizontal multiply ====
761: *
762: DO 180 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
763: JLEN = MIN( NH, JBOT-JCOL+1 )
764: *
765: * ==== Copy bottom of H to top+KZS of scratch ====
766: * (The first KZS rows get multiplied by zero.) ====
767: *
768: CALL ZLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
769: $ LDH, WH( KZS+1, 1 ), LDWH )
770: *
771: * ==== Multiply by U21**H ====
772: *
773: CALL ZLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
774: CALL ZTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
775: $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
776: $ LDWH )
777: *
778: * ==== Multiply top of H by U11**H ====
779: *
780: CALL ZGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
781: $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
782: *
783: * ==== Copy top of H to bottom of WH ====
784: *
785: CALL ZLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
786: $ WH( I2+1, 1 ), LDWH )
787: *
788: * ==== Multiply by U21**H ====
789: *
790: CALL ZTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
791: $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
792: *
793: * ==== Multiply by U22 ====
794: *
795: CALL ZGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
796: $ U( J2+1, I2+1 ), LDU,
797: $ H( INCOL+1+J2, JCOL ), LDH, ONE,
798: $ WH( I2+1, 1 ), LDWH )
799: *
800: * ==== Copy it back ====
801: *
802: CALL ZLACPY( 'ALL', KDU, JLEN, WH, LDWH,
803: $ H( INCOL+1, JCOL ), LDH )
804: 180 CONTINUE
805: *
806: * ==== Vertical multiply ====
807: *
808: DO 190 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
809: JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
810: *
811: * ==== Copy right of H to scratch (the first KZS
812: * . columns get multiplied by zero) ====
813: *
814: CALL ZLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
815: $ LDH, WV( 1, 1+KZS ), LDWV )
816: *
817: * ==== Multiply by U21 ====
818: *
819: CALL ZLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
820: CALL ZTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
821: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
822: $ LDWV )
823: *
824: * ==== Multiply by U11 ====
825: *
826: CALL ZGEMM( 'N', 'N', JLEN, I2, J2, ONE,
827: $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
828: $ LDWV )
829: *
830: * ==== Copy left of H to right of scratch ====
831: *
832: CALL ZLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
833: $ WV( 1, 1+I2 ), LDWV )
834: *
835: * ==== Multiply by U21 ====
836: *
837: CALL ZTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
838: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
839: *
840: * ==== Multiply by U22 ====
841: *
842: CALL ZGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
843: $ H( JROW, INCOL+1+J2 ), LDH,
844: $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
845: $ LDWV )
846: *
847: * ==== Copy it back ====
848: *
849: CALL ZLACPY( 'ALL', JLEN, KDU, WV, LDWV,
850: $ H( JROW, INCOL+1 ), LDH )
851: 190 CONTINUE
852: *
853: * ==== Multiply Z (also vertical) ====
854: *
855: IF( WANTZ ) THEN
856: DO 200 JROW = ILOZ, IHIZ, NV
857: JLEN = MIN( NV, IHIZ-JROW+1 )
858: *
859: * ==== Copy right of Z to left of scratch (first
860: * . KZS columns get multiplied by zero) ====
861: *
862: CALL ZLACPY( 'ALL', JLEN, KNZ,
863: $ Z( JROW, INCOL+1+J2 ), LDZ,
864: $ WV( 1, 1+KZS ), LDWV )
865: *
866: * ==== Multiply by U12 ====
867: *
868: CALL ZLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
869: $ LDWV )
870: CALL ZTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
871: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
872: $ LDWV )
873: *
874: * ==== Multiply by U11 ====
875: *
876: CALL ZGEMM( 'N', 'N', JLEN, I2, J2, ONE,
877: $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
878: $ WV, LDWV )
879: *
880: * ==== Copy left of Z to right of scratch ====
881: *
882: CALL ZLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
883: $ LDZ, WV( 1, 1+I2 ), LDWV )
884: *
885: * ==== Multiply by U21 ====
886: *
887: CALL ZTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
888: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
889: $ LDWV )
890: *
891: * ==== Multiply by U22 ====
892: *
893: CALL ZGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
894: $ Z( JROW, INCOL+1+J2 ), LDZ,
895: $ U( J2+1, I2+1 ), LDU, ONE,
896: $ WV( 1, 1+I2 ), LDWV )
897: *
898: * ==== Copy the result back to Z ====
899: *
900: CALL ZLACPY( 'ALL', JLEN, KDU, WV, LDWV,
901: $ Z( JROW, INCOL+1 ), LDZ )
902: 200 CONTINUE
903: END IF
904: END IF
905: END IF
906: 210 CONTINUE
907: *
908: * ==== End of ZLAQR5 ====
909: *
910: END
CVSweb interface <joel.bertrand@systella.fr>