Annotation of rpl/lapack/lapack/dlaqr3.f, revision 1.6
1.1 bertrand 1: SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
2: $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
3: $ LDT, NV, WV, LDWV, WORK, LWORK )
4: *
1.5 bertrand 5: * -- LAPACK auxiliary routine (version 3.2.2) --
1.1 bertrand 6: * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
1.5 bertrand 7: * -- June 2010 --
1.1 bertrand 8: *
9: * .. Scalar Arguments ..
10: INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11: $ LDZ, LWORK, N, ND, NH, NS, NV, NW
12: LOGICAL WANTT, WANTZ
13: * ..
14: * .. Array Arguments ..
15: DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
16: $ V( LDV, * ), WORK( * ), WV( LDWV, * ),
17: $ Z( LDZ, * )
18: * ..
19: *
20: * ******************************************************************
21: * Aggressive early deflation:
22: *
23: * This subroutine accepts as input an upper Hessenberg matrix
24: * H and performs an orthogonal similarity transformation
25: * designed to detect and deflate fully converged eigenvalues from
26: * a trailing principal submatrix. On output H has been over-
27: * written by a new Hessenberg matrix that is a perturbation of
28: * an orthogonal similarity transformation of H. It is to be
29: * hoped that the final version of H has many zero subdiagonal
30: * entries.
31: *
32: * ******************************************************************
33: * WANTT (input) LOGICAL
34: * If .TRUE., then the Hessenberg matrix H is fully updated
35: * so that the quasi-triangular Schur factor may be
36: * computed (in cooperation with the calling subroutine).
37: * If .FALSE., then only enough of H is updated to preserve
38: * the eigenvalues.
39: *
40: * WANTZ (input) LOGICAL
41: * If .TRUE., then the orthogonal matrix Z is updated so
42: * so that the orthogonal Schur factor may be computed
43: * (in cooperation with the calling subroutine).
44: * If .FALSE., then Z is not referenced.
45: *
46: * N (input) INTEGER
47: * The order of the matrix H and (if WANTZ is .TRUE.) the
48: * order of the orthogonal matrix Z.
49: *
50: * KTOP (input) INTEGER
51: * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
52: * KBOT and KTOP together determine an isolated block
53: * along the diagonal of the Hessenberg matrix.
54: *
55: * KBOT (input) INTEGER
56: * It is assumed without a check that either
57: * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
58: * determine an isolated block along the diagonal of the
59: * Hessenberg matrix.
60: *
61: * NW (input) INTEGER
62: * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
63: *
64: * H (input/output) DOUBLE PRECISION array, dimension (LDH,N)
65: * On input the initial N-by-N section of H stores the
66: * Hessenberg matrix undergoing aggressive early deflation.
67: * On output H has been transformed by an orthogonal
68: * similarity transformation, perturbed, and the returned
69: * to Hessenberg form that (it is to be hoped) has some
70: * zero subdiagonal entries.
71: *
72: * LDH (input) integer
73: * Leading dimension of H just as declared in the calling
74: * subroutine. N .LE. LDH
75: *
76: * ILOZ (input) INTEGER
77: * IHIZ (input) INTEGER
78: * Specify the rows of Z to which transformations must be
79: * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
80: *
81: * Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
82: * IF WANTZ is .TRUE., then on output, the orthogonal
83: * similarity transformation mentioned above has been
84: * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
85: * If WANTZ is .FALSE., then Z is unreferenced.
86: *
87: * LDZ (input) integer
88: * The leading dimension of Z just as declared in the
89: * calling subroutine. 1 .LE. LDZ.
90: *
91: * NS (output) integer
92: * The number of unconverged (ie approximate) eigenvalues
93: * returned in SR and SI that may be used as shifts by the
94: * calling subroutine.
95: *
96: * ND (output) integer
97: * The number of converged eigenvalues uncovered by this
98: * subroutine.
99: *
1.5 bertrand 100: * SR (output) DOUBLE PRECISION array, dimension (KBOT)
101: * SI (output) DOUBLE PRECISION array, dimension (KBOT)
1.1 bertrand 102: * On output, the real and imaginary parts of approximate
103: * eigenvalues that may be used for shifts are stored in
104: * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
105: * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
106: * The real and imaginary parts of converged eigenvalues
107: * are stored in SR(KBOT-ND+1) through SR(KBOT) and
108: * SI(KBOT-ND+1) through SI(KBOT), respectively.
109: *
110: * V (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
111: * An NW-by-NW work array.
112: *
113: * LDV (input) integer scalar
114: * The leading dimension of V just as declared in the
115: * calling subroutine. NW .LE. LDV
116: *
117: * NH (input) integer scalar
118: * The number of columns of T. NH.GE.NW.
119: *
120: * T (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
121: *
122: * LDT (input) integer
123: * The leading dimension of T just as declared in the
124: * calling subroutine. NW .LE. LDT
125: *
126: * NV (input) integer
127: * The number of rows of work array WV available for
128: * workspace. NV.GE.NW.
129: *
130: * WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
131: *
132: * LDWV (input) integer
133: * The leading dimension of W just as declared in the
134: * calling subroutine. NW .LE. LDV
135: *
1.5 bertrand 136: * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
1.1 bertrand 137: * On exit, WORK(1) is set to an estimate of the optimal value
138: * of LWORK for the given values of N, NW, KTOP and KBOT.
139: *
140: * LWORK (input) integer
141: * The dimension of the work array WORK. LWORK = 2*NW
142: * suffices, but greater efficiency may result from larger
143: * values of LWORK.
144: *
145: * If LWORK = -1, then a workspace query is assumed; DLAQR3
146: * only estimates the optimal workspace size for the given
147: * values of N, NW, KTOP and KBOT. The estimate is returned
148: * in WORK(1). No error message related to LWORK is issued
149: * by XERBLA. Neither H nor Z are accessed.
150: *
151: * ================================================================
152: * Based on contributions by
153: * Karen Braman and Ralph Byers, Department of Mathematics,
154: * University of Kansas, USA
155: *
156: * ================================================================
157: * .. Parameters ..
158: DOUBLE PRECISION ZERO, ONE
159: PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
160: * ..
161: * .. Local Scalars ..
162: DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
163: $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
164: INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
165: $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
166: $ LWKOPT, NMIN
167: LOGICAL BULGE, SORTED
168: * ..
169: * .. External Functions ..
170: DOUBLE PRECISION DLAMCH
171: INTEGER ILAENV
172: EXTERNAL DLAMCH, ILAENV
173: * ..
174: * .. External Subroutines ..
175: EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
176: $ DLANV2, DLAQR4, DLARF, DLARFG, DLASET, DORMHR,
177: $ DTREXC
178: * ..
179: * .. Intrinsic Functions ..
180: INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT
181: * ..
182: * .. Executable Statements ..
183: *
184: * ==== Estimate optimal workspace. ====
185: *
186: JW = MIN( NW, KBOT-KTOP+1 )
187: IF( JW.LE.2 ) THEN
188: LWKOPT = 1
189: ELSE
190: *
191: * ==== Workspace query call to DGEHRD ====
192: *
193: CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
194: LWK1 = INT( WORK( 1 ) )
195: *
196: * ==== Workspace query call to DORMHR ====
197: *
198: CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
199: $ WORK, -1, INFO )
200: LWK2 = INT( WORK( 1 ) )
201: *
202: * ==== Workspace query call to DLAQR4 ====
203: *
204: CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW,
205: $ V, LDV, WORK, -1, INFQR )
206: LWK3 = INT( WORK( 1 ) )
207: *
208: * ==== Optimal workspace ====
209: *
210: LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
211: END IF
212: *
213: * ==== Quick return in case of workspace query. ====
214: *
215: IF( LWORK.EQ.-1 ) THEN
216: WORK( 1 ) = DBLE( LWKOPT )
217: RETURN
218: END IF
219: *
220: * ==== Nothing to do ...
221: * ... for an empty active block ... ====
222: NS = 0
223: ND = 0
224: WORK( 1 ) = ONE
225: IF( KTOP.GT.KBOT )
226: $ RETURN
227: * ... nor for an empty deflation window. ====
228: IF( NW.LT.1 )
229: $ RETURN
230: *
231: * ==== Machine constants ====
232: *
233: SAFMIN = DLAMCH( 'SAFE MINIMUM' )
234: SAFMAX = ONE / SAFMIN
235: CALL DLABAD( SAFMIN, SAFMAX )
236: ULP = DLAMCH( 'PRECISION' )
237: SMLNUM = SAFMIN*( DBLE( N ) / ULP )
238: *
239: * ==== Setup deflation window ====
240: *
241: JW = MIN( NW, KBOT-KTOP+1 )
242: KWTOP = KBOT - JW + 1
243: IF( KWTOP.EQ.KTOP ) THEN
244: S = ZERO
245: ELSE
246: S = H( KWTOP, KWTOP-1 )
247: END IF
248: *
249: IF( KBOT.EQ.KWTOP ) THEN
250: *
251: * ==== 1-by-1 deflation window: not much to do ====
252: *
253: SR( KWTOP ) = H( KWTOP, KWTOP )
254: SI( KWTOP ) = ZERO
255: NS = 1
256: ND = 0
257: IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
258: $ THEN
259: NS = 0
260: ND = 1
261: IF( KWTOP.GT.KTOP )
262: $ H( KWTOP, KWTOP-1 ) = ZERO
263: END IF
264: WORK( 1 ) = ONE
265: RETURN
266: END IF
267: *
268: * ==== Convert to spike-triangular form. (In case of a
269: * . rare QR failure, this routine continues to do
270: * . aggressive early deflation using that part of
271: * . the deflation window that converged using INFQR
272: * . here and there to keep track.) ====
273: *
274: CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
275: CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
276: *
277: CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
278: NMIN = ILAENV( 12, 'DLAQR3', 'SV', JW, 1, JW, LWORK )
279: IF( JW.GT.NMIN ) THEN
280: CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
281: $ SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR )
282: ELSE
283: CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
284: $ SI( KWTOP ), 1, JW, V, LDV, INFQR )
285: END IF
286: *
287: * ==== DTREXC needs a clean margin near the diagonal ====
288: *
289: DO 10 J = 1, JW - 3
290: T( J+2, J ) = ZERO
291: T( J+3, J ) = ZERO
292: 10 CONTINUE
293: IF( JW.GT.2 )
294: $ T( JW, JW-2 ) = ZERO
295: *
296: * ==== Deflation detection loop ====
297: *
298: NS = JW
299: ILST = INFQR + 1
300: 20 CONTINUE
301: IF( ILST.LE.NS ) THEN
302: IF( NS.EQ.1 ) THEN
303: BULGE = .FALSE.
304: ELSE
305: BULGE = T( NS, NS-1 ).NE.ZERO
306: END IF
307: *
308: * ==== Small spike tip test for deflation ====
309: *
310: IF( .NOT.BULGE ) THEN
311: *
312: * ==== Real eigenvalue ====
313: *
314: FOO = ABS( T( NS, NS ) )
315: IF( FOO.EQ.ZERO )
316: $ FOO = ABS( S )
317: IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
318: *
319: * ==== Deflatable ====
320: *
321: NS = NS - 1
322: ELSE
323: *
324: * ==== Undeflatable. Move it up out of the way.
325: * . (DTREXC can not fail in this case.) ====
326: *
327: IFST = NS
328: CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
329: $ INFO )
330: ILST = ILST + 1
331: END IF
332: ELSE
333: *
334: * ==== Complex conjugate pair ====
335: *
336: FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
337: $ SQRT( ABS( T( NS-1, NS ) ) )
338: IF( FOO.EQ.ZERO )
339: $ FOO = ABS( S )
340: IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
341: $ MAX( SMLNUM, ULP*FOO ) ) THEN
342: *
343: * ==== Deflatable ====
344: *
345: NS = NS - 2
346: ELSE
347: *
348: * ==== Undeflatable. Move them up out of the way.
349: * . Fortunately, DTREXC does the right thing with
350: * . ILST in case of a rare exchange failure. ====
351: *
352: IFST = NS
353: CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
354: $ INFO )
355: ILST = ILST + 2
356: END IF
357: END IF
358: *
359: * ==== End deflation detection loop ====
360: *
361: GO TO 20
362: END IF
363: *
364: * ==== Return to Hessenberg form ====
365: *
366: IF( NS.EQ.0 )
367: $ S = ZERO
368: *
369: IF( NS.LT.JW ) THEN
370: *
371: * ==== sorting diagonal blocks of T improves accuracy for
372: * . graded matrices. Bubble sort deals well with
373: * . exchange failures. ====
374: *
375: SORTED = .false.
376: I = NS + 1
377: 30 CONTINUE
378: IF( SORTED )
379: $ GO TO 50
380: SORTED = .true.
381: *
382: KEND = I - 1
383: I = INFQR + 1
384: IF( I.EQ.NS ) THEN
385: K = I + 1
386: ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
387: K = I + 1
388: ELSE
389: K = I + 2
390: END IF
391: 40 CONTINUE
392: IF( K.LE.KEND ) THEN
393: IF( K.EQ.I+1 ) THEN
394: EVI = ABS( T( I, I ) )
395: ELSE
396: EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
397: $ SQRT( ABS( T( I, I+1 ) ) )
398: END IF
399: *
400: IF( K.EQ.KEND ) THEN
401: EVK = ABS( T( K, K ) )
402: ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
403: EVK = ABS( T( K, K ) )
404: ELSE
405: EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
406: $ SQRT( ABS( T( K, K+1 ) ) )
407: END IF
408: *
409: IF( EVI.GE.EVK ) THEN
410: I = K
411: ELSE
412: SORTED = .false.
413: IFST = I
414: ILST = K
415: CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
416: $ INFO )
417: IF( INFO.EQ.0 ) THEN
418: I = ILST
419: ELSE
420: I = K
421: END IF
422: END IF
423: IF( I.EQ.KEND ) THEN
424: K = I + 1
425: ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
426: K = I + 1
427: ELSE
428: K = I + 2
429: END IF
430: GO TO 40
431: END IF
432: GO TO 30
433: 50 CONTINUE
434: END IF
435: *
436: * ==== Restore shift/eigenvalue array from T ====
437: *
438: I = JW
439: 60 CONTINUE
440: IF( I.GE.INFQR+1 ) THEN
441: IF( I.EQ.INFQR+1 ) THEN
442: SR( KWTOP+I-1 ) = T( I, I )
443: SI( KWTOP+I-1 ) = ZERO
444: I = I - 1
445: ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
446: SR( KWTOP+I-1 ) = T( I, I )
447: SI( KWTOP+I-1 ) = ZERO
448: I = I - 1
449: ELSE
450: AA = T( I-1, I-1 )
451: CC = T( I, I-1 )
452: BB = T( I-1, I )
453: DD = T( I, I )
454: CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
455: $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
456: $ SI( KWTOP+I-1 ), CS, SN )
457: I = I - 2
458: END IF
459: GO TO 60
460: END IF
461: *
462: IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
463: IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
464: *
465: * ==== Reflect spike back into lower triangle ====
466: *
467: CALL DCOPY( NS, V, LDV, WORK, 1 )
468: BETA = WORK( 1 )
469: CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
470: WORK( 1 ) = ONE
471: *
472: CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
473: *
474: CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
475: $ WORK( JW+1 ) )
476: CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
477: $ WORK( JW+1 ) )
478: CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
479: $ WORK( JW+1 ) )
480: *
481: CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
482: $ LWORK-JW, INFO )
483: END IF
484: *
485: * ==== Copy updated reduced window into place ====
486: *
487: IF( KWTOP.GT.1 )
488: $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
489: CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
490: CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
491: $ LDH+1 )
492: *
493: * ==== Accumulate orthogonal matrix in order update
494: * . H and Z, if requested. ====
495: *
496: IF( NS.GT.1 .AND. S.NE.ZERO )
497: $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
498: $ WORK( JW+1 ), LWORK-JW, INFO )
499: *
500: * ==== Update vertical slab in H ====
501: *
502: IF( WANTT ) THEN
503: LTOP = 1
504: ELSE
505: LTOP = KTOP
506: END IF
507: DO 70 KROW = LTOP, KWTOP - 1, NV
508: KLN = MIN( NV, KWTOP-KROW )
509: CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
510: $ LDH, V, LDV, ZERO, WV, LDWV )
511: CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
512: 70 CONTINUE
513: *
514: * ==== Update horizontal slab in H ====
515: *
516: IF( WANTT ) THEN
517: DO 80 KCOL = KBOT + 1, N, NH
518: KLN = MIN( NH, N-KCOL+1 )
519: CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
520: $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
521: CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
522: $ LDH )
523: 80 CONTINUE
524: END IF
525: *
526: * ==== Update vertical slab in Z ====
527: *
528: IF( WANTZ ) THEN
529: DO 90 KROW = ILOZ, IHIZ, NV
530: KLN = MIN( NV, IHIZ-KROW+1 )
531: CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
532: $ LDZ, V, LDV, ZERO, WV, LDWV )
533: CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
534: $ LDZ )
535: 90 CONTINUE
536: END IF
537: END IF
538: *
539: * ==== Return the number of deflations ... ====
540: *
541: ND = JW - NS
542: *
543: * ==== ... and the number of shifts. (Subtracting
544: * . INFQR from the spike length takes care
545: * . of the case of a rare QR failure while
546: * . calculating eigenvalues of the deflation
547: * . window.) ====
548: *
549: NS = NS - INFQR
550: *
551: * ==== Return optimal workspace. ====
552: *
553: WORK( 1 ) = DBLE( LWKOPT )
554: *
555: * ==== End of DLAQR3 ====
556: *
557: END
CVSweb interface <joel.bertrand@systella.fr>