1: *> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download DLAQR5 + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22: * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23: * LDU, NV, WV, LDWV, NH, WH, LDWH )
24: *
25: * .. Scalar Arguments ..
26: * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27: * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28: * LOGICAL WANTT, WANTZ
29: * ..
30: * .. Array Arguments ..
31: * DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32: * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33: * $ Z( LDZ, * )
34: * ..
35: *
36: *
37: *> \par Purpose:
38: * =============
39: *>
40: *> \verbatim
41: *>
42: *> DLAQR5, called by DLAQR0, performs a
43: *> single small-bulge multi-shift QR sweep.
44: *> \endverbatim
45: *
46: * Arguments:
47: * ==========
48: *
49: *> \param[in] WANTT
50: *> \verbatim
51: *> WANTT is LOGICAL
52: *> WANTT = .true. if the quasi-triangular Schur factor
53: *> is being computed. WANTT is set to .false. otherwise.
54: *> \endverbatim
55: *>
56: *> \param[in] WANTZ
57: *> \verbatim
58: *> WANTZ is LOGICAL
59: *> WANTZ = .true. if the orthogonal Schur factor is being
60: *> computed. WANTZ is set to .false. otherwise.
61: *> \endverbatim
62: *>
63: *> \param[in] KACC22
64: *> \verbatim
65: *> KACC22 is INTEGER with value 0, 1, or 2.
66: *> Specifies the computation mode of far-from-diagonal
67: *> orthogonal updates.
68: *> = 0: DLAQR5 does not accumulate reflections and does not
69: *> use matrix-matrix multiply to update far-from-diagonal
70: *> matrix entries.
71: *> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
72: *> multiply to update the far-from-diagonal matrix entries.
73: *> = 2: DLAQR5 accumulates reflections, uses matrix-matrix
74: *> multiply to update the far-from-diagonal matrix entries,
75: *> and takes advantage of 2-by-2 block structure during
76: *> matrix multiplies.
77: *> \endverbatim
78: *>
79: *> \param[in] N
80: *> \verbatim
81: *> N is INTEGER
82: *> N is the order of the Hessenberg matrix H upon which this
83: *> subroutine operates.
84: *> \endverbatim
85: *>
86: *> \param[in] KTOP
87: *> \verbatim
88: *> KTOP is INTEGER
89: *> \endverbatim
90: *>
91: *> \param[in] KBOT
92: *> \verbatim
93: *> KBOT is INTEGER
94: *> These are the first and last rows and columns of an
95: *> isolated diagonal block upon which the QR sweep is to be
96: *> applied. It is assumed without a check that
97: *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
98: *> and
99: *> either KBOT = N or H(KBOT+1,KBOT) = 0.
100: *> \endverbatim
101: *>
102: *> \param[in] NSHFTS
103: *> \verbatim
104: *> NSHFTS is INTEGER
105: *> NSHFTS gives the number of simultaneous shifts. NSHFTS
106: *> must be positive and even.
107: *> \endverbatim
108: *>
109: *> \param[in,out] SR
110: *> \verbatim
111: *> SR is DOUBLE PRECISION array, dimension (NSHFTS)
112: *> \endverbatim
113: *>
114: *> \param[in,out] SI
115: *> \verbatim
116: *> SI is DOUBLE PRECISION array, dimension (NSHFTS)
117: *> SR contains the real parts and SI contains the imaginary
118: *> parts of the NSHFTS shifts of origin that define the
119: *> multi-shift QR sweep. On output SR and SI may be
120: *> reordered.
121: *> \endverbatim
122: *>
123: *> \param[in,out] H
124: *> \verbatim
125: *> H is DOUBLE PRECISION array, dimension (LDH,N)
126: *> On input H contains a Hessenberg matrix. On output a
127: *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
128: *> to the isolated diagonal block in rows and columns KTOP
129: *> through KBOT.
130: *> \endverbatim
131: *>
132: *> \param[in] LDH
133: *> \verbatim
134: *> LDH is INTEGER
135: *> LDH is the leading dimension of H just as declared in the
136: *> calling procedure. LDH >= MAX(1,N).
137: *> \endverbatim
138: *>
139: *> \param[in] ILOZ
140: *> \verbatim
141: *> ILOZ is INTEGER
142: *> \endverbatim
143: *>
144: *> \param[in] IHIZ
145: *> \verbatim
146: *> IHIZ is INTEGER
147: *> Specify the rows of Z to which transformations must be
148: *> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
149: *> \endverbatim
150: *>
151: *> \param[in,out] Z
152: *> \verbatim
153: *> Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ)
154: *> If WANTZ = .TRUE., then the QR Sweep orthogonal
155: *> similarity transformation is accumulated into
156: *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
157: *> If WANTZ = .FALSE., then Z is unreferenced.
158: *> \endverbatim
159: *>
160: *> \param[in] LDZ
161: *> \verbatim
162: *> LDZ is INTEGER
163: *> LDA is the leading dimension of Z just as declared in
164: *> the calling procedure. LDZ >= N.
165: *> \endverbatim
166: *>
167: *> \param[out] V
168: *> \verbatim
169: *> V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2)
170: *> \endverbatim
171: *>
172: *> \param[in] LDV
173: *> \verbatim
174: *> LDV is INTEGER
175: *> LDV is the leading dimension of V as declared in the
176: *> calling procedure. LDV >= 3.
177: *> \endverbatim
178: *>
179: *> \param[out] U
180: *> \verbatim
181: *> U is DOUBLE PRECISION array, dimension (LDU,3*NSHFTS-3)
182: *> \endverbatim
183: *>
184: *> \param[in] LDU
185: *> \verbatim
186: *> LDU is INTEGER
187: *> LDU is the leading dimension of U just as declared in the
188: *> in the calling subroutine. LDU >= 3*NSHFTS-3.
189: *> \endverbatim
190: *>
191: *> \param[in] NV
192: *> \verbatim
193: *> NV is INTEGER
194: *> NV is the number of rows in WV agailable for workspace.
195: *> NV >= 1.
196: *> \endverbatim
197: *>
198: *> \param[out] WV
199: *> \verbatim
200: *> WV is DOUBLE PRECISION array, dimension (LDWV,3*NSHFTS-3)
201: *> \endverbatim
202: *>
203: *> \param[in] LDWV
204: *> \verbatim
205: *> LDWV is INTEGER
206: *> LDWV is the leading dimension of WV as declared in the
207: *> in the calling subroutine. LDWV >= NV.
208: *> \endverbatim
209: *
210: *> \param[in] NH
211: *> \verbatim
212: *> NH is INTEGER
213: *> NH is the number of columns in array WH available for
214: *> workspace. NH >= 1.
215: *> \endverbatim
216: *>
217: *> \param[out] WH
218: *> \verbatim
219: *> WH is DOUBLE PRECISION array, dimension (LDWH,NH)
220: *> \endverbatim
221: *>
222: *> \param[in] LDWH
223: *> \verbatim
224: *> LDWH is INTEGER
225: *> Leading dimension of WH just as declared in the
226: *> calling procedure. LDWH >= 3*NSHFTS-3.
227: *> \endverbatim
228: *>
229: * Authors:
230: * ========
231: *
232: *> \author Univ. of Tennessee
233: *> \author Univ. of California Berkeley
234: *> \author Univ. of Colorado Denver
235: *> \author NAG Ltd.
236: *
237: *> \date June 2016
238: *
239: *> \ingroup doubleOTHERauxiliary
240: *
241: *> \par Contributors:
242: * ==================
243: *>
244: *> Karen Braman and Ralph Byers, Department of Mathematics,
245: *> University of Kansas, USA
246: *
247: *> \par References:
248: * ================
249: *>
250: *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
251: *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
252: *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
253: *> 929--947, 2002.
254: *>
255: * =====================================================================
256: SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
257: $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
258: $ LDU, NV, WV, LDWV, NH, WH, LDWH )
259: *
260: * -- LAPACK auxiliary routine (version 3.7.1) --
261: * -- LAPACK is a software package provided by Univ. of Tennessee, --
262: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
263: * June 2016
264: *
265: * .. Scalar Arguments ..
266: INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
267: $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
268: LOGICAL WANTT, WANTZ
269: * ..
270: * .. Array Arguments ..
271: DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
272: $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
273: $ Z( LDZ, * )
274: * ..
275: *
276: * ================================================================
277: * .. Parameters ..
278: DOUBLE PRECISION ZERO, ONE
279: PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
280: * ..
281: * .. Local Scalars ..
282: DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
283: $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
284: $ ULP
285: INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
286: $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
287: $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
288: $ NS, NU
289: LOGICAL ACCUM, BLK22, BMP22
290: * ..
291: * .. External Functions ..
292: DOUBLE PRECISION DLAMCH
293: EXTERNAL DLAMCH
294: * ..
295: * .. Intrinsic Functions ..
296: *
297: INTRINSIC ABS, DBLE, MAX, MIN, MOD
298: * ..
299: * .. Local Arrays ..
300: DOUBLE PRECISION VT( 3 )
301: * ..
302: * .. External Subroutines ..
303: EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET,
304: $ DTRMM
305: * ..
306: * .. Executable Statements ..
307: *
308: * ==== If there are no shifts, then there is nothing to do. ====
309: *
310: IF( NSHFTS.LT.2 )
311: $ RETURN
312: *
313: * ==== If the active block is empty or 1-by-1, then there
314: * . is nothing to do. ====
315: *
316: IF( KTOP.GE.KBOT )
317: $ RETURN
318: *
319: * ==== Shuffle shifts into pairs of real shifts and pairs
320: * . of complex conjugate shifts assuming complex
321: * . conjugate shifts are already adjacent to one
322: * . another. ====
323: *
324: DO 10 I = 1, NSHFTS - 2, 2
325: IF( SI( I ).NE.-SI( I+1 ) ) THEN
326: *
327: SWAP = SR( I )
328: SR( I ) = SR( I+1 )
329: SR( I+1 ) = SR( I+2 )
330: SR( I+2 ) = SWAP
331: *
332: SWAP = SI( I )
333: SI( I ) = SI( I+1 )
334: SI( I+1 ) = SI( I+2 )
335: SI( I+2 ) = SWAP
336: END IF
337: 10 CONTINUE
338: *
339: * ==== NSHFTS is supposed to be even, but if it is odd,
340: * . then simply reduce it by one. The shuffle above
341: * . ensures that the dropped shift is real and that
342: * . the remaining shifts are paired. ====
343: *
344: NS = NSHFTS - MOD( NSHFTS, 2 )
345: *
346: * ==== Machine constants for deflation ====
347: *
348: SAFMIN = DLAMCH( 'SAFE MINIMUM' )
349: SAFMAX = ONE / SAFMIN
350: CALL DLABAD( SAFMIN, SAFMAX )
351: ULP = DLAMCH( 'PRECISION' )
352: SMLNUM = SAFMIN*( DBLE( N ) / ULP )
353: *
354: * ==== Use accumulated reflections to update far-from-diagonal
355: * . entries ? ====
356: *
357: ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
358: *
359: * ==== If so, exploit the 2-by-2 block structure? ====
360: *
361: BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
362: *
363: * ==== clear trash ====
364: *
365: IF( KTOP+2.LE.KBOT )
366: $ H( KTOP+2, KTOP ) = ZERO
367: *
368: * ==== NBMPS = number of 2-shift bulges in the chain ====
369: *
370: NBMPS = NS / 2
371: *
372: * ==== KDU = width of slab ====
373: *
374: KDU = 6*NBMPS - 3
375: *
376: * ==== Create and chase chains of NBMPS bulges ====
377: *
378: DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
379: NDCOL = INCOL + KDU
380: IF( ACCUM )
381: $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
382: *
383: * ==== Near-the-diagonal bulge chase. The following loop
384: * . performs the near-the-diagonal part of a small bulge
385: * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
386: * . chunk extends from column INCOL to column NDCOL
387: * . (including both column INCOL and column NDCOL). The
388: * . following loop chases a 3*NBMPS column long chain of
389: * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
390: * . may be less than KTOP and and NDCOL may be greater than
391: * . KBOT indicating phantom columns from which to chase
392: * . bulges before they are actually introduced or to which
393: * . to chase bulges beyond column KBOT.) ====
394: *
395: DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
396: *
397: * ==== Bulges number MTOP to MBOT are active double implicit
398: * . shift bulges. There may or may not also be small
399: * . 2-by-2 bulge, if there is room. The inactive bulges
400: * . (if any) must wait until the active bulges have moved
401: * . down the diagonal to make room. The phantom matrix
402: * . paradigm described above helps keep track. ====
403: *
404: MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
405: MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
406: M22 = MBOT + 1
407: BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
408: $ ( KBOT-2 )
409: *
410: * ==== Generate reflections to chase the chain right
411: * . one column. (The minimum value of K is KTOP-1.) ====
412: *
413: DO 20 M = MTOP, MBOT
414: K = KRCOL + 3*( M-1 )
415: IF( K.EQ.KTOP-1 ) THEN
416: CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
417: $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
418: $ V( 1, M ) )
419: ALPHA = V( 1, M )
420: CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
421: ELSE
422: BETA = H( K+1, K )
423: V( 2, M ) = H( K+2, K )
424: V( 3, M ) = H( K+3, K )
425: CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
426: *
427: * ==== A Bulge may collapse because of vigilant
428: * . deflation or destructive underflow. In the
429: * . underflow case, try the two-small-subdiagonals
430: * . trick to try to reinflate the bulge. ====
431: *
432: IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
433: $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
434: *
435: * ==== Typical case: not collapsed (yet). ====
436: *
437: H( K+1, K ) = BETA
438: H( K+2, K ) = ZERO
439: H( K+3, K ) = ZERO
440: ELSE
441: *
442: * ==== Atypical case: collapsed. Attempt to
443: * . reintroduce ignoring H(K+1,K) and H(K+2,K).
444: * . If the fill resulting from the new
445: * . reflector is too large, then abandon it.
446: * . Otherwise, use the new one. ====
447: *
448: CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
449: $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
450: $ VT )
451: ALPHA = VT( 1 )
452: CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
453: REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
454: $ H( K+2, K ) )
455: *
456: IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
457: $ ABS( REFSUM*VT( 3 ) ).GT.ULP*
458: $ ( ABS( H( K, K ) )+ABS( H( K+1,
459: $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
460: *
461: * ==== Starting a new bulge here would
462: * . create non-negligible fill. Use
463: * . the old one with trepidation. ====
464: *
465: H( K+1, K ) = BETA
466: H( K+2, K ) = ZERO
467: H( K+3, K ) = ZERO
468: ELSE
469: *
470: * ==== Stating a new bulge here would
471: * . create only negligible fill.
472: * . Replace the old reflector with
473: * . the new one. ====
474: *
475: H( K+1, K ) = H( K+1, K ) - REFSUM
476: H( K+2, K ) = ZERO
477: H( K+3, K ) = ZERO
478: V( 1, M ) = VT( 1 )
479: V( 2, M ) = VT( 2 )
480: V( 3, M ) = VT( 3 )
481: END IF
482: END IF
483: END IF
484: 20 CONTINUE
485: *
486: * ==== Generate a 2-by-2 reflection, if needed. ====
487: *
488: K = KRCOL + 3*( M22-1 )
489: IF( BMP22 ) THEN
490: IF( K.EQ.KTOP-1 ) THEN
491: CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
492: $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
493: $ V( 1, M22 ) )
494: BETA = V( 1, M22 )
495: CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
496: ELSE
497: BETA = H( K+1, K )
498: V( 2, M22 ) = H( K+2, K )
499: CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
500: H( K+1, K ) = BETA
501: H( K+2, K ) = ZERO
502: END IF
503: END IF
504: *
505: * ==== Multiply H by reflections from the left ====
506: *
507: IF( ACCUM ) THEN
508: JBOT = MIN( NDCOL, KBOT )
509: ELSE IF( WANTT ) THEN
510: JBOT = N
511: ELSE
512: JBOT = KBOT
513: END IF
514: DO 40 J = MAX( KTOP, KRCOL ), JBOT
515: MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
516: DO 30 M = MTOP, MEND
517: K = KRCOL + 3*( M-1 )
518: REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
519: $ H( K+2, J )+V( 3, M )*H( K+3, J ) )
520: H( K+1, J ) = H( K+1, J ) - REFSUM
521: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
522: H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
523: 30 CONTINUE
524: 40 CONTINUE
525: IF( BMP22 ) THEN
526: K = KRCOL + 3*( M22-1 )
527: DO 50 J = MAX( K+1, KTOP ), JBOT
528: REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
529: $ H( K+2, J ) )
530: H( K+1, J ) = H( K+1, J ) - REFSUM
531: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
532: 50 CONTINUE
533: END IF
534: *
535: * ==== Multiply H by reflections from the right.
536: * . Delay filling in the last row until the
537: * . vigilant deflation check is complete. ====
538: *
539: IF( ACCUM ) THEN
540: JTOP = MAX( KTOP, INCOL )
541: ELSE IF( WANTT ) THEN
542: JTOP = 1
543: ELSE
544: JTOP = KTOP
545: END IF
546: DO 90 M = MTOP, MBOT
547: IF( V( 1, M ).NE.ZERO ) THEN
548: K = KRCOL + 3*( M-1 )
549: DO 60 J = JTOP, MIN( KBOT, K+3 )
550: REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
551: $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
552: H( J, K+1 ) = H( J, K+1 ) - REFSUM
553: H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
554: H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
555: 60 CONTINUE
556: *
557: IF( ACCUM ) THEN
558: *
559: * ==== Accumulate U. (If necessary, update Z later
560: * . with with an efficient matrix-matrix
561: * . multiply.) ====
562: *
563: KMS = K - INCOL
564: DO 70 J = MAX( 1, KTOP-INCOL ), KDU
565: REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
566: $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
567: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
568: U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
569: U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
570: 70 CONTINUE
571: ELSE IF( WANTZ ) THEN
572: *
573: * ==== U is not accumulated, so update Z
574: * . now by multiplying by reflections
575: * . from the right. ====
576: *
577: DO 80 J = ILOZ, IHIZ
578: REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
579: $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
580: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
581: Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
582: Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
583: 80 CONTINUE
584: END IF
585: END IF
586: 90 CONTINUE
587: *
588: * ==== Special case: 2-by-2 reflection (if needed) ====
589: *
590: K = KRCOL + 3*( M22-1 )
591: IF( BMP22 ) THEN
592: IF ( V( 1, M22 ).NE.ZERO ) THEN
593: DO 100 J = JTOP, MIN( KBOT, K+3 )
594: REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
595: $ H( J, K+2 ) )
596: H( J, K+1 ) = H( J, K+1 ) - REFSUM
597: H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
598: 100 CONTINUE
599: *
600: IF( ACCUM ) THEN
601: KMS = K - INCOL
602: DO 110 J = MAX( 1, KTOP-INCOL ), KDU
603: REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
604: $ V( 2, M22 )*U( J, KMS+2 ) )
605: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
606: U( J, KMS+2 ) = U( J, KMS+2 ) -
607: $ REFSUM*V( 2, M22 )
608: 110 CONTINUE
609: ELSE IF( WANTZ ) THEN
610: DO 120 J = ILOZ, IHIZ
611: REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
612: $ Z( J, K+2 ) )
613: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
614: Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
615: 120 CONTINUE
616: END IF
617: END IF
618: END IF
619: *
620: * ==== Vigilant deflation check ====
621: *
622: MSTART = MTOP
623: IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
624: $ MSTART = MSTART + 1
625: MEND = MBOT
626: IF( BMP22 )
627: $ MEND = MEND + 1
628: IF( KRCOL.EQ.KBOT-2 )
629: $ MEND = MEND + 1
630: DO 130 M = MSTART, MEND
631: K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
632: *
633: * ==== The following convergence test requires that
634: * . the tradition small-compared-to-nearby-diagonals
635: * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
636: * . criteria both be satisfied. The latter improves
637: * . accuracy in some examples. Falling back on an
638: * . alternate convergence criterion when TST1 or TST2
639: * . is zero (as done here) is traditional but probably
640: * . unnecessary. ====
641: *
642: IF( H( K+1, K ).NE.ZERO ) THEN
643: TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
644: IF( TST1.EQ.ZERO ) THEN
645: IF( K.GE.KTOP+1 )
646: $ TST1 = TST1 + ABS( H( K, K-1 ) )
647: IF( K.GE.KTOP+2 )
648: $ TST1 = TST1 + ABS( H( K, K-2 ) )
649: IF( K.GE.KTOP+3 )
650: $ TST1 = TST1 + ABS( H( K, K-3 ) )
651: IF( K.LE.KBOT-2 )
652: $ TST1 = TST1 + ABS( H( K+2, K+1 ) )
653: IF( K.LE.KBOT-3 )
654: $ TST1 = TST1 + ABS( H( K+3, K+1 ) )
655: IF( K.LE.KBOT-4 )
656: $ TST1 = TST1 + ABS( H( K+4, K+1 ) )
657: END IF
658: IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
659: $ THEN
660: H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
661: H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
662: H11 = MAX( ABS( H( K+1, K+1 ) ),
663: $ ABS( H( K, K )-H( K+1, K+1 ) ) )
664: H22 = MIN( ABS( H( K+1, K+1 ) ),
665: $ ABS( H( K, K )-H( K+1, K+1 ) ) )
666: SCL = H11 + H12
667: TST2 = H22*( H11 / SCL )
668: *
669: IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
670: $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
671: END IF
672: END IF
673: 130 CONTINUE
674: *
675: * ==== Fill in the last row of each bulge. ====
676: *
677: MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
678: DO 140 M = MTOP, MEND
679: K = KRCOL + 3*( M-1 )
680: REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
681: H( K+4, K+1 ) = -REFSUM
682: H( K+4, K+2 ) = -REFSUM*V( 2, M )
683: H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
684: 140 CONTINUE
685: *
686: * ==== End of near-the-diagonal bulge chase. ====
687: *
688: 150 CONTINUE
689: *
690: * ==== Use U (if accumulated) to update far-from-diagonal
691: * . entries in H. If required, use U to update Z as
692: * . well. ====
693: *
694: IF( ACCUM ) THEN
695: IF( WANTT ) THEN
696: JTOP = 1
697: JBOT = N
698: ELSE
699: JTOP = KTOP
700: JBOT = KBOT
701: END IF
702: IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
703: $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
704: *
705: * ==== Updates not exploiting the 2-by-2 block
706: * . structure of U. K1 and NU keep track of
707: * . the location and size of U in the special
708: * . cases of introducing bulges and chasing
709: * . bulges off the bottom. In these special
710: * . cases and in case the number of shifts
711: * . is NS = 2, there is no 2-by-2 block
712: * . structure to exploit. ====
713: *
714: K1 = MAX( 1, KTOP-INCOL )
715: NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
716: *
717: * ==== Horizontal Multiply ====
718: *
719: DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
720: JLEN = MIN( NH, JBOT-JCOL+1 )
721: CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
722: $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
723: $ LDWH )
724: CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH,
725: $ H( INCOL+K1, JCOL ), LDH )
726: 160 CONTINUE
727: *
728: * ==== Vertical multiply ====
729: *
730: DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
731: JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
732: CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
733: $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
734: $ LDU, ZERO, WV, LDWV )
735: CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
736: $ H( JROW, INCOL+K1 ), LDH )
737: 170 CONTINUE
738: *
739: * ==== Z multiply (also vertical) ====
740: *
741: IF( WANTZ ) THEN
742: DO 180 JROW = ILOZ, IHIZ, NV
743: JLEN = MIN( NV, IHIZ-JROW+1 )
744: CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
745: $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
746: $ LDU, ZERO, WV, LDWV )
747: CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
748: $ Z( JROW, INCOL+K1 ), LDZ )
749: 180 CONTINUE
750: END IF
751: ELSE
752: *
753: * ==== Updates exploiting U's 2-by-2 block structure.
754: * . (I2, I4, J2, J4 are the last rows and columns
755: * . of the blocks.) ====
756: *
757: I2 = ( KDU+1 ) / 2
758: I4 = KDU
759: J2 = I4 - I2
760: J4 = KDU
761: *
762: * ==== KZS and KNZ deal with the band of zeros
763: * . along the diagonal of one of the triangular
764: * . blocks. ====
765: *
766: KZS = ( J4-J2 ) - ( NS+1 )
767: KNZ = NS + 1
768: *
769: * ==== Horizontal multiply ====
770: *
771: DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
772: JLEN = MIN( NH, JBOT-JCOL+1 )
773: *
774: * ==== Copy bottom of H to top+KZS of scratch ====
775: * (The first KZS rows get multiplied by zero.) ====
776: *
777: CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
778: $ LDH, WH( KZS+1, 1 ), LDWH )
779: *
780: * ==== Multiply by U21**T ====
781: *
782: CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
783: CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
784: $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
785: $ LDWH )
786: *
787: * ==== Multiply top of H by U11**T ====
788: *
789: CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
790: $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
791: *
792: * ==== Copy top of H to bottom of WH ====
793: *
794: CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
795: $ WH( I2+1, 1 ), LDWH )
796: *
797: * ==== Multiply by U21**T ====
798: *
799: CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
800: $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
801: *
802: * ==== Multiply by U22 ====
803: *
804: CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
805: $ U( J2+1, I2+1 ), LDU,
806: $ H( INCOL+1+J2, JCOL ), LDH, ONE,
807: $ WH( I2+1, 1 ), LDWH )
808: *
809: * ==== Copy it back ====
810: *
811: CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH,
812: $ H( INCOL+1, JCOL ), LDH )
813: 190 CONTINUE
814: *
815: * ==== Vertical multiply ====
816: *
817: DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
818: JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
819: *
820: * ==== Copy right of H to scratch (the first KZS
821: * . columns get multiplied by zero) ====
822: *
823: CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
824: $ LDH, WV( 1, 1+KZS ), LDWV )
825: *
826: * ==== Multiply by U21 ====
827: *
828: CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
829: CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
830: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
831: $ LDWV )
832: *
833: * ==== Multiply by U11 ====
834: *
835: CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
836: $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
837: $ LDWV )
838: *
839: * ==== Copy left of H to right of scratch ====
840: *
841: CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
842: $ WV( 1, 1+I2 ), LDWV )
843: *
844: * ==== Multiply by U21 ====
845: *
846: CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
847: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
848: *
849: * ==== Multiply by U22 ====
850: *
851: CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
852: $ H( JROW, INCOL+1+J2 ), LDH,
853: $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
854: $ LDWV )
855: *
856: * ==== Copy it back ====
857: *
858: CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
859: $ H( JROW, INCOL+1 ), LDH )
860: 200 CONTINUE
861: *
862: * ==== Multiply Z (also vertical) ====
863: *
864: IF( WANTZ ) THEN
865: DO 210 JROW = ILOZ, IHIZ, NV
866: JLEN = MIN( NV, IHIZ-JROW+1 )
867: *
868: * ==== Copy right of Z to left of scratch (first
869: * . KZS columns get multiplied by zero) ====
870: *
871: CALL DLACPY( 'ALL', JLEN, KNZ,
872: $ Z( JROW, INCOL+1+J2 ), LDZ,
873: $ WV( 1, 1+KZS ), LDWV )
874: *
875: * ==== Multiply by U12 ====
876: *
877: CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
878: $ LDWV )
879: CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
880: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
881: $ LDWV )
882: *
883: * ==== Multiply by U11 ====
884: *
885: CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
886: $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
887: $ WV, LDWV )
888: *
889: * ==== Copy left of Z to right of scratch ====
890: *
891: CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
892: $ LDZ, WV( 1, 1+I2 ), LDWV )
893: *
894: * ==== Multiply by U21 ====
895: *
896: CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
897: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
898: $ LDWV )
899: *
900: * ==== Multiply by U22 ====
901: *
902: CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
903: $ Z( JROW, INCOL+1+J2 ), LDZ,
904: $ U( J2+1, I2+1 ), LDU, ONE,
905: $ WV( 1, 1+I2 ), LDWV )
906: *
907: * ==== Copy the result back to Z ====
908: *
909: CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
910: $ Z( JROW, INCOL+1 ), LDZ )
911: 210 CONTINUE
912: END IF
913: END IF
914: END IF
915: 220 CONTINUE
916: *
917: * ==== End of DLAQR5 ====
918: *
919: END
CVSweb interface <joel.bertrand@systella.fr>