1: *> \brief \b DLAQR5
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download DLAQR5 + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22: * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23: * LDU, NV, WV, LDWV, NH, WH, LDWH )
24: *
25: * .. Scalar Arguments ..
26: * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27: * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28: * LOGICAL WANTT, WANTZ
29: * ..
30: * .. Array Arguments ..
31: * DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32: * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33: * $ Z( LDZ, * )
34: * ..
35: *
36: *
37: *> \par Purpose:
38: * =============
39: *>
40: *> \verbatim
41: *>
42: *> DLAQR5, called by DLAQR0, performs a
43: *> single small-bulge multi-shift QR sweep.
44: *> \endverbatim
45: *
46: * Arguments:
47: * ==========
48: *
49: *> \param[in] WANTT
50: *> \verbatim
51: *> WANTT is logical scalar
52: *> WANTT = .true. if the quasi-triangular Schur factor
53: *> is being computed. WANTT is set to .false. otherwise.
54: *> \endverbatim
55: *>
56: *> \param[in] WANTZ
57: *> \verbatim
58: *> WANTZ is logical scalar
59: *> WANTZ = .true. if the orthogonal Schur factor is being
60: *> computed. WANTZ is set to .false. otherwise.
61: *> \endverbatim
62: *>
63: *> \param[in] KACC22
64: *> \verbatim
65: *> KACC22 is integer with value 0, 1, or 2.
66: *> Specifies the computation mode of far-from-diagonal
67: *> orthogonal updates.
68: *> = 0: DLAQR5 does not accumulate reflections and does not
69: *> use matrix-matrix multiply to update far-from-diagonal
70: *> matrix entries.
71: *> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
72: *> multiply to update the far-from-diagonal matrix entries.
73: *> = 2: DLAQR5 accumulates reflections, uses matrix-matrix
74: *> multiply to update the far-from-diagonal matrix entries,
75: *> and takes advantage of 2-by-2 block structure during
76: *> matrix multiplies.
77: *> \endverbatim
78: *>
79: *> \param[in] N
80: *> \verbatim
81: *> N is integer scalar
82: *> N is the order of the Hessenberg matrix H upon which this
83: *> subroutine operates.
84: *> \endverbatim
85: *>
86: *> \param[in] KTOP
87: *> \verbatim
88: *> KTOP is integer scalar
89: *> \endverbatim
90: *>
91: *> \param[in] KBOT
92: *> \verbatim
93: *> KBOT is integer scalar
94: *> These are the first and last rows and columns of an
95: *> isolated diagonal block upon which the QR sweep is to be
96: *> applied. It is assumed without a check that
97: *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
98: *> and
99: *> either KBOT = N or H(KBOT+1,KBOT) = 0.
100: *> \endverbatim
101: *>
102: *> \param[in] NSHFTS
103: *> \verbatim
104: *> NSHFTS is integer scalar
105: *> NSHFTS gives the number of simultaneous shifts. NSHFTS
106: *> must be positive and even.
107: *> \endverbatim
108: *>
109: *> \param[in,out] SR
110: *> \verbatim
111: *> SR is DOUBLE PRECISION array of size (NSHFTS)
112: *> \endverbatim
113: *>
114: *> \param[in,out] SI
115: *> \verbatim
116: *> SI is DOUBLE PRECISION array of size (NSHFTS)
117: *> SR contains the real parts and SI contains the imaginary
118: *> parts of the NSHFTS shifts of origin that define the
119: *> multi-shift QR sweep. On output SR and SI may be
120: *> reordered.
121: *> \endverbatim
122: *>
123: *> \param[in,out] H
124: *> \verbatim
125: *> H is DOUBLE PRECISION array of size (LDH,N)
126: *> On input H contains a Hessenberg matrix. On output a
127: *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
128: *> to the isolated diagonal block in rows and columns KTOP
129: *> through KBOT.
130: *> \endverbatim
131: *>
132: *> \param[in] LDH
133: *> \verbatim
134: *> LDH is integer scalar
135: *> LDH is the leading dimension of H just as declared in the
136: *> calling procedure. LDH.GE.MAX(1,N).
137: *> \endverbatim
138: *>
139: *> \param[in] ILOZ
140: *> \verbatim
141: *> ILOZ is INTEGER
142: *> \endverbatim
143: *>
144: *> \param[in] IHIZ
145: *> \verbatim
146: *> IHIZ is INTEGER
147: *> Specify the rows of Z to which transformations must be
148: *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
149: *> \endverbatim
150: *>
151: *> \param[in,out] Z
152: *> \verbatim
153: *> Z is DOUBLE PRECISION array of size (LDZ,IHI)
154: *> If WANTZ = .TRUE., then the QR Sweep orthogonal
155: *> similarity transformation is accumulated into
156: *> Z(ILOZ:IHIZ,ILO:IHI) from the right.
157: *> If WANTZ = .FALSE., then Z is unreferenced.
158: *> \endverbatim
159: *>
160: *> \param[in] LDZ
161: *> \verbatim
162: *> LDZ is integer scalar
163: *> LDA is the leading dimension of Z just as declared in
164: *> the calling procedure. LDZ.GE.N.
165: *> \endverbatim
166: *>
167: *> \param[out] V
168: *> \verbatim
169: *> V is DOUBLE PRECISION array of size (LDV,NSHFTS/2)
170: *> \endverbatim
171: *>
172: *> \param[in] LDV
173: *> \verbatim
174: *> LDV is integer scalar
175: *> LDV is the leading dimension of V as declared in the
176: *> calling procedure. LDV.GE.3.
177: *> \endverbatim
178: *>
179: *> \param[out] U
180: *> \verbatim
181: *> U is DOUBLE PRECISION array of size
182: *> (LDU,3*NSHFTS-3)
183: *> \endverbatim
184: *>
185: *> \param[in] LDU
186: *> \verbatim
187: *> LDU is integer scalar
188: *> LDU is the leading dimension of U just as declared in the
189: *> in the calling subroutine. LDU.GE.3*NSHFTS-3.
190: *> \endverbatim
191: *>
192: *> \param[in] NH
193: *> \verbatim
194: *> NH is integer scalar
195: *> NH is the number of columns in array WH available for
196: *> workspace. NH.GE.1.
197: *> \endverbatim
198: *>
199: *> \param[out] WH
200: *> \verbatim
201: *> WH is DOUBLE PRECISION array of size (LDWH,NH)
202: *> \endverbatim
203: *>
204: *> \param[in] LDWH
205: *> \verbatim
206: *> LDWH is integer scalar
207: *> Leading dimension of WH just as declared in the
208: *> calling procedure. LDWH.GE.3*NSHFTS-3.
209: *> \endverbatim
210: *>
211: *> \param[in] NV
212: *> \verbatim
213: *> NV is integer scalar
214: *> NV is the number of rows in WV agailable for workspace.
215: *> NV.GE.1.
216: *> \endverbatim
217: *>
218: *> \param[out] WV
219: *> \verbatim
220: *> WV is DOUBLE PRECISION array of size
221: *> (LDWV,3*NSHFTS-3)
222: *> \endverbatim
223: *>
224: *> \param[in] LDWV
225: *> \verbatim
226: *> LDWV is integer scalar
227: *> LDWV is the leading dimension of WV as declared in the
228: *> in the calling subroutine. LDWV.GE.NV.
229: *> \endverbatim
230: *
231: * Authors:
232: * ========
233: *
234: *> \author Univ. of Tennessee
235: *> \author Univ. of California Berkeley
236: *> \author Univ. of Colorado Denver
237: *> \author NAG Ltd.
238: *
239: *> \date November 2011
240: *
241: *> \ingroup doubleOTHERauxiliary
242: *
243: *> \par Contributors:
244: * ==================
245: *>
246: *> Karen Braman and Ralph Byers, Department of Mathematics,
247: *> University of Kansas, USA
248: *
249: *> \par References:
250: * ================
251: *>
252: *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253: *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254: *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
255: *> 929--947, 2002.
256: *>
257: * =====================================================================
258: SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
259: $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
260: $ LDU, NV, WV, LDWV, NH, WH, LDWH )
261: *
262: * -- LAPACK auxiliary routine (version 3.4.0) --
263: * -- LAPACK is a software package provided by Univ. of Tennessee, --
264: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
265: * November 2011
266: *
267: * .. Scalar Arguments ..
268: INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
269: $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
270: LOGICAL WANTT, WANTZ
271: * ..
272: * .. Array Arguments ..
273: DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
274: $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
275: $ Z( LDZ, * )
276: * ..
277: *
278: * ================================================================
279: * .. Parameters ..
280: DOUBLE PRECISION ZERO, ONE
281: PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
282: * ..
283: * .. Local Scalars ..
284: DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
285: $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
286: $ ULP
287: INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
288: $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
289: $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
290: $ NS, NU
291: LOGICAL ACCUM, BLK22, BMP22
292: * ..
293: * .. External Functions ..
294: DOUBLE PRECISION DLAMCH
295: EXTERNAL DLAMCH
296: * ..
297: * .. Intrinsic Functions ..
298: *
299: INTRINSIC ABS, DBLE, MAX, MIN, MOD
300: * ..
301: * .. Local Arrays ..
302: DOUBLE PRECISION VT( 3 )
303: * ..
304: * .. External Subroutines ..
305: EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET,
306: $ DTRMM
307: * ..
308: * .. Executable Statements ..
309: *
310: * ==== If there are no shifts, then there is nothing to do. ====
311: *
312: IF( NSHFTS.LT.2 )
313: $ RETURN
314: *
315: * ==== If the active block is empty or 1-by-1, then there
316: * . is nothing to do. ====
317: *
318: IF( KTOP.GE.KBOT )
319: $ RETURN
320: *
321: * ==== Shuffle shifts into pairs of real shifts and pairs
322: * . of complex conjugate shifts assuming complex
323: * . conjugate shifts are already adjacent to one
324: * . another. ====
325: *
326: DO 10 I = 1, NSHFTS - 2, 2
327: IF( SI( I ).NE.-SI( I+1 ) ) THEN
328: *
329: SWAP = SR( I )
330: SR( I ) = SR( I+1 )
331: SR( I+1 ) = SR( I+2 )
332: SR( I+2 ) = SWAP
333: *
334: SWAP = SI( I )
335: SI( I ) = SI( I+1 )
336: SI( I+1 ) = SI( I+2 )
337: SI( I+2 ) = SWAP
338: END IF
339: 10 CONTINUE
340: *
341: * ==== NSHFTS is supposed to be even, but if it is odd,
342: * . then simply reduce it by one. The shuffle above
343: * . ensures that the dropped shift is real and that
344: * . the remaining shifts are paired. ====
345: *
346: NS = NSHFTS - MOD( NSHFTS, 2 )
347: *
348: * ==== Machine constants for deflation ====
349: *
350: SAFMIN = DLAMCH( 'SAFE MINIMUM' )
351: SAFMAX = ONE / SAFMIN
352: CALL DLABAD( SAFMIN, SAFMAX )
353: ULP = DLAMCH( 'PRECISION' )
354: SMLNUM = SAFMIN*( DBLE( N ) / ULP )
355: *
356: * ==== Use accumulated reflections to update far-from-diagonal
357: * . entries ? ====
358: *
359: ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
360: *
361: * ==== If so, exploit the 2-by-2 block structure? ====
362: *
363: BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
364: *
365: * ==== clear trash ====
366: *
367: IF( KTOP+2.LE.KBOT )
368: $ H( KTOP+2, KTOP ) = ZERO
369: *
370: * ==== NBMPS = number of 2-shift bulges in the chain ====
371: *
372: NBMPS = NS / 2
373: *
374: * ==== KDU = width of slab ====
375: *
376: KDU = 6*NBMPS - 3
377: *
378: * ==== Create and chase chains of NBMPS bulges ====
379: *
380: DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
381: NDCOL = INCOL + KDU
382: IF( ACCUM )
383: $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
384: *
385: * ==== Near-the-diagonal bulge chase. The following loop
386: * . performs the near-the-diagonal part of a small bulge
387: * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
388: * . chunk extends from column INCOL to column NDCOL
389: * . (including both column INCOL and column NDCOL). The
390: * . following loop chases a 3*NBMPS column long chain of
391: * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
392: * . may be less than KTOP and and NDCOL may be greater than
393: * . KBOT indicating phantom columns from which to chase
394: * . bulges before they are actually introduced or to which
395: * . to chase bulges beyond column KBOT.) ====
396: *
397: DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
398: *
399: * ==== Bulges number MTOP to MBOT are active double implicit
400: * . shift bulges. There may or may not also be small
401: * . 2-by-2 bulge, if there is room. The inactive bulges
402: * . (if any) must wait until the active bulges have moved
403: * . down the diagonal to make room. The phantom matrix
404: * . paradigm described above helps keep track. ====
405: *
406: MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
407: MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
408: M22 = MBOT + 1
409: BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
410: $ ( KBOT-2 )
411: *
412: * ==== Generate reflections to chase the chain right
413: * . one column. (The minimum value of K is KTOP-1.) ====
414: *
415: DO 20 M = MTOP, MBOT
416: K = KRCOL + 3*( M-1 )
417: IF( K.EQ.KTOP-1 ) THEN
418: CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
419: $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
420: $ V( 1, M ) )
421: ALPHA = V( 1, M )
422: CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
423: ELSE
424: BETA = H( K+1, K )
425: V( 2, M ) = H( K+2, K )
426: V( 3, M ) = H( K+3, K )
427: CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
428: *
429: * ==== A Bulge may collapse because of vigilant
430: * . deflation or destructive underflow. In the
431: * . underflow case, try the two-small-subdiagonals
432: * . trick to try to reinflate the bulge. ====
433: *
434: IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
435: $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
436: *
437: * ==== Typical case: not collapsed (yet). ====
438: *
439: H( K+1, K ) = BETA
440: H( K+2, K ) = ZERO
441: H( K+3, K ) = ZERO
442: ELSE
443: *
444: * ==== Atypical case: collapsed. Attempt to
445: * . reintroduce ignoring H(K+1,K) and H(K+2,K).
446: * . If the fill resulting from the new
447: * . reflector is too large, then abandon it.
448: * . Otherwise, use the new one. ====
449: *
450: CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
451: $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
452: $ VT )
453: ALPHA = VT( 1 )
454: CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
455: REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
456: $ H( K+2, K ) )
457: *
458: IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
459: $ ABS( REFSUM*VT( 3 ) ).GT.ULP*
460: $ ( ABS( H( K, K ) )+ABS( H( K+1,
461: $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
462: *
463: * ==== Starting a new bulge here would
464: * . create non-negligible fill. Use
465: * . the old one with trepidation. ====
466: *
467: H( K+1, K ) = BETA
468: H( K+2, K ) = ZERO
469: H( K+3, K ) = ZERO
470: ELSE
471: *
472: * ==== Stating a new bulge here would
473: * . create only negligible fill.
474: * . Replace the old reflector with
475: * . the new one. ====
476: *
477: H( K+1, K ) = H( K+1, K ) - REFSUM
478: H( K+2, K ) = ZERO
479: H( K+3, K ) = ZERO
480: V( 1, M ) = VT( 1 )
481: V( 2, M ) = VT( 2 )
482: V( 3, M ) = VT( 3 )
483: END IF
484: END IF
485: END IF
486: 20 CONTINUE
487: *
488: * ==== Generate a 2-by-2 reflection, if needed. ====
489: *
490: K = KRCOL + 3*( M22-1 )
491: IF( BMP22 ) THEN
492: IF( K.EQ.KTOP-1 ) THEN
493: CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
494: $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
495: $ V( 1, M22 ) )
496: BETA = V( 1, M22 )
497: CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
498: ELSE
499: BETA = H( K+1, K )
500: V( 2, M22 ) = H( K+2, K )
501: CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
502: H( K+1, K ) = BETA
503: H( K+2, K ) = ZERO
504: END IF
505: END IF
506: *
507: * ==== Multiply H by reflections from the left ====
508: *
509: IF( ACCUM ) THEN
510: JBOT = MIN( NDCOL, KBOT )
511: ELSE IF( WANTT ) THEN
512: JBOT = N
513: ELSE
514: JBOT = KBOT
515: END IF
516: DO 40 J = MAX( KTOP, KRCOL ), JBOT
517: MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
518: DO 30 M = MTOP, MEND
519: K = KRCOL + 3*( M-1 )
520: REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
521: $ H( K+2, J )+V( 3, M )*H( K+3, J ) )
522: H( K+1, J ) = H( K+1, J ) - REFSUM
523: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
524: H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
525: 30 CONTINUE
526: 40 CONTINUE
527: IF( BMP22 ) THEN
528: K = KRCOL + 3*( M22-1 )
529: DO 50 J = MAX( K+1, KTOP ), JBOT
530: REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
531: $ H( K+2, J ) )
532: H( K+1, J ) = H( K+1, J ) - REFSUM
533: H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
534: 50 CONTINUE
535: END IF
536: *
537: * ==== Multiply H by reflections from the right.
538: * . Delay filling in the last row until the
539: * . vigilant deflation check is complete. ====
540: *
541: IF( ACCUM ) THEN
542: JTOP = MAX( KTOP, INCOL )
543: ELSE IF( WANTT ) THEN
544: JTOP = 1
545: ELSE
546: JTOP = KTOP
547: END IF
548: DO 90 M = MTOP, MBOT
549: IF( V( 1, M ).NE.ZERO ) THEN
550: K = KRCOL + 3*( M-1 )
551: DO 60 J = JTOP, MIN( KBOT, K+3 )
552: REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
553: $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
554: H( J, K+1 ) = H( J, K+1 ) - REFSUM
555: H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
556: H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
557: 60 CONTINUE
558: *
559: IF( ACCUM ) THEN
560: *
561: * ==== Accumulate U. (If necessary, update Z later
562: * . with with an efficient matrix-matrix
563: * . multiply.) ====
564: *
565: KMS = K - INCOL
566: DO 70 J = MAX( 1, KTOP-INCOL ), KDU
567: REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
568: $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
569: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
570: U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
571: U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
572: 70 CONTINUE
573: ELSE IF( WANTZ ) THEN
574: *
575: * ==== U is not accumulated, so update Z
576: * . now by multiplying by reflections
577: * . from the right. ====
578: *
579: DO 80 J = ILOZ, IHIZ
580: REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
581: $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
582: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
583: Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
584: Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
585: 80 CONTINUE
586: END IF
587: END IF
588: 90 CONTINUE
589: *
590: * ==== Special case: 2-by-2 reflection (if needed) ====
591: *
592: K = KRCOL + 3*( M22-1 )
593: IF( BMP22 ) THEN
594: IF ( V( 1, M22 ).NE.ZERO ) THEN
595: DO 100 J = JTOP, MIN( KBOT, K+3 )
596: REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
597: $ H( J, K+2 ) )
598: H( J, K+1 ) = H( J, K+1 ) - REFSUM
599: H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
600: 100 CONTINUE
601: *
602: IF( ACCUM ) THEN
603: KMS = K - INCOL
604: DO 110 J = MAX( 1, KTOP-INCOL ), KDU
605: REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
606: $ V( 2, M22 )*U( J, KMS+2 ) )
607: U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
608: U( J, KMS+2 ) = U( J, KMS+2 ) -
609: $ REFSUM*V( 2, M22 )
610: 110 CONTINUE
611: ELSE IF( WANTZ ) THEN
612: DO 120 J = ILOZ, IHIZ
613: REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
614: $ Z( J, K+2 ) )
615: Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
616: Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
617: 120 CONTINUE
618: END IF
619: END IF
620: END IF
621: *
622: * ==== Vigilant deflation check ====
623: *
624: MSTART = MTOP
625: IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
626: $ MSTART = MSTART + 1
627: MEND = MBOT
628: IF( BMP22 )
629: $ MEND = MEND + 1
630: IF( KRCOL.EQ.KBOT-2 )
631: $ MEND = MEND + 1
632: DO 130 M = MSTART, MEND
633: K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
634: *
635: * ==== The following convergence test requires that
636: * . the tradition small-compared-to-nearby-diagonals
637: * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
638: * . criteria both be satisfied. The latter improves
639: * . accuracy in some examples. Falling back on an
640: * . alternate convergence criterion when TST1 or TST2
641: * . is zero (as done here) is traditional but probably
642: * . unnecessary. ====
643: *
644: IF( H( K+1, K ).NE.ZERO ) THEN
645: TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
646: IF( TST1.EQ.ZERO ) THEN
647: IF( K.GE.KTOP+1 )
648: $ TST1 = TST1 + ABS( H( K, K-1 ) )
649: IF( K.GE.KTOP+2 )
650: $ TST1 = TST1 + ABS( H( K, K-2 ) )
651: IF( K.GE.KTOP+3 )
652: $ TST1 = TST1 + ABS( H( K, K-3 ) )
653: IF( K.LE.KBOT-2 )
654: $ TST1 = TST1 + ABS( H( K+2, K+1 ) )
655: IF( K.LE.KBOT-3 )
656: $ TST1 = TST1 + ABS( H( K+3, K+1 ) )
657: IF( K.LE.KBOT-4 )
658: $ TST1 = TST1 + ABS( H( K+4, K+1 ) )
659: END IF
660: IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
661: $ THEN
662: H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
663: H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
664: H11 = MAX( ABS( H( K+1, K+1 ) ),
665: $ ABS( H( K, K )-H( K+1, K+1 ) ) )
666: H22 = MIN( ABS( H( K+1, K+1 ) ),
667: $ ABS( H( K, K )-H( K+1, K+1 ) ) )
668: SCL = H11 + H12
669: TST2 = H22*( H11 / SCL )
670: *
671: IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
672: $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
673: END IF
674: END IF
675: 130 CONTINUE
676: *
677: * ==== Fill in the last row of each bulge. ====
678: *
679: MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
680: DO 140 M = MTOP, MEND
681: K = KRCOL + 3*( M-1 )
682: REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
683: H( K+4, K+1 ) = -REFSUM
684: H( K+4, K+2 ) = -REFSUM*V( 2, M )
685: H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
686: 140 CONTINUE
687: *
688: * ==== End of near-the-diagonal bulge chase. ====
689: *
690: 150 CONTINUE
691: *
692: * ==== Use U (if accumulated) to update far-from-diagonal
693: * . entries in H. If required, use U to update Z as
694: * . well. ====
695: *
696: IF( ACCUM ) THEN
697: IF( WANTT ) THEN
698: JTOP = 1
699: JBOT = N
700: ELSE
701: JTOP = KTOP
702: JBOT = KBOT
703: END IF
704: IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
705: $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
706: *
707: * ==== Updates not exploiting the 2-by-2 block
708: * . structure of U. K1 and NU keep track of
709: * . the location and size of U in the special
710: * . cases of introducing bulges and chasing
711: * . bulges off the bottom. In these special
712: * . cases and in case the number of shifts
713: * . is NS = 2, there is no 2-by-2 block
714: * . structure to exploit. ====
715: *
716: K1 = MAX( 1, KTOP-INCOL )
717: NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
718: *
719: * ==== Horizontal Multiply ====
720: *
721: DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
722: JLEN = MIN( NH, JBOT-JCOL+1 )
723: CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
724: $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
725: $ LDWH )
726: CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH,
727: $ H( INCOL+K1, JCOL ), LDH )
728: 160 CONTINUE
729: *
730: * ==== Vertical multiply ====
731: *
732: DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
733: JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
734: CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
735: $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
736: $ LDU, ZERO, WV, LDWV )
737: CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
738: $ H( JROW, INCOL+K1 ), LDH )
739: 170 CONTINUE
740: *
741: * ==== Z multiply (also vertical) ====
742: *
743: IF( WANTZ ) THEN
744: DO 180 JROW = ILOZ, IHIZ, NV
745: JLEN = MIN( NV, IHIZ-JROW+1 )
746: CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
747: $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
748: $ LDU, ZERO, WV, LDWV )
749: CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
750: $ Z( JROW, INCOL+K1 ), LDZ )
751: 180 CONTINUE
752: END IF
753: ELSE
754: *
755: * ==== Updates exploiting U's 2-by-2 block structure.
756: * . (I2, I4, J2, J4 are the last rows and columns
757: * . of the blocks.) ====
758: *
759: I2 = ( KDU+1 ) / 2
760: I4 = KDU
761: J2 = I4 - I2
762: J4 = KDU
763: *
764: * ==== KZS and KNZ deal with the band of zeros
765: * . along the diagonal of one of the triangular
766: * . blocks. ====
767: *
768: KZS = ( J4-J2 ) - ( NS+1 )
769: KNZ = NS + 1
770: *
771: * ==== Horizontal multiply ====
772: *
773: DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
774: JLEN = MIN( NH, JBOT-JCOL+1 )
775: *
776: * ==== Copy bottom of H to top+KZS of scratch ====
777: * (The first KZS rows get multiplied by zero.) ====
778: *
779: CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
780: $ LDH, WH( KZS+1, 1 ), LDWH )
781: *
782: * ==== Multiply by U21**T ====
783: *
784: CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
785: CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
786: $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
787: $ LDWH )
788: *
789: * ==== Multiply top of H by U11**T ====
790: *
791: CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
792: $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
793: *
794: * ==== Copy top of H to bottom of WH ====
795: *
796: CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
797: $ WH( I2+1, 1 ), LDWH )
798: *
799: * ==== Multiply by U21**T ====
800: *
801: CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
802: $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
803: *
804: * ==== Multiply by U22 ====
805: *
806: CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
807: $ U( J2+1, I2+1 ), LDU,
808: $ H( INCOL+1+J2, JCOL ), LDH, ONE,
809: $ WH( I2+1, 1 ), LDWH )
810: *
811: * ==== Copy it back ====
812: *
813: CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH,
814: $ H( INCOL+1, JCOL ), LDH )
815: 190 CONTINUE
816: *
817: * ==== Vertical multiply ====
818: *
819: DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
820: JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
821: *
822: * ==== Copy right of H to scratch (the first KZS
823: * . columns get multiplied by zero) ====
824: *
825: CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
826: $ LDH, WV( 1, 1+KZS ), LDWV )
827: *
828: * ==== Multiply by U21 ====
829: *
830: CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
831: CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
832: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
833: $ LDWV )
834: *
835: * ==== Multiply by U11 ====
836: *
837: CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
838: $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
839: $ LDWV )
840: *
841: * ==== Copy left of H to right of scratch ====
842: *
843: CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
844: $ WV( 1, 1+I2 ), LDWV )
845: *
846: * ==== Multiply by U21 ====
847: *
848: CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
849: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
850: *
851: * ==== Multiply by U22 ====
852: *
853: CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
854: $ H( JROW, INCOL+1+J2 ), LDH,
855: $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
856: $ LDWV )
857: *
858: * ==== Copy it back ====
859: *
860: CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
861: $ H( JROW, INCOL+1 ), LDH )
862: 200 CONTINUE
863: *
864: * ==== Multiply Z (also vertical) ====
865: *
866: IF( WANTZ ) THEN
867: DO 210 JROW = ILOZ, IHIZ, NV
868: JLEN = MIN( NV, IHIZ-JROW+1 )
869: *
870: * ==== Copy right of Z to left of scratch (first
871: * . KZS columns get multiplied by zero) ====
872: *
873: CALL DLACPY( 'ALL', JLEN, KNZ,
874: $ Z( JROW, INCOL+1+J2 ), LDZ,
875: $ WV( 1, 1+KZS ), LDWV )
876: *
877: * ==== Multiply by U12 ====
878: *
879: CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
880: $ LDWV )
881: CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
882: $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
883: $ LDWV )
884: *
885: * ==== Multiply by U11 ====
886: *
887: CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
888: $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
889: $ WV, LDWV )
890: *
891: * ==== Copy left of Z to right of scratch ====
892: *
893: CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
894: $ LDZ, WV( 1, 1+I2 ), LDWV )
895: *
896: * ==== Multiply by U21 ====
897: *
898: CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
899: $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
900: $ LDWV )
901: *
902: * ==== Multiply by U22 ====
903: *
904: CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
905: $ Z( JROW, INCOL+1+J2 ), LDZ,
906: $ U( J2+1, I2+1 ), LDU, ONE,
907: $ WV( 1, 1+I2 ), LDWV )
908: *
909: * ==== Copy the result back to Z ====
910: *
911: CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
912: $ Z( JROW, INCOL+1 ), LDZ )
913: 210 CONTINUE
914: END IF
915: END IF
916: END IF
917: 220 CONTINUE
918: *
919: * ==== End of DLAQR5 ====
920: *
921: END
CVSweb interface <joel.bertrand@systella.fr>