File:
[local] /
rpl /
lapack /
lapack /
dlaqr2.f
Revision
1.8:
download - view:
text,
annotated -
select for diffs -
revision graph
Tue Dec 21 13:53:31 2010 UTC (13 years, 6 months ago) by
bertrand
Branches:
MAIN
CVS tags:
rpl-4_1_3,
rpl-4_1_2,
rpl-4_1_1,
rpl-4_1_0,
rpl-4_0_24,
rpl-4_0_22,
rpl-4_0_21,
rpl-4_0_20,
rpl-4_0,
HEAD
Mise à jour de lapack vers la version 3.3.0.
1: SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
2: $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
3: $ LDT, NV, WV, LDWV, WORK, LWORK )
4: *
5: * -- LAPACK auxiliary routine (version 3.2.2) --
6: * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
7: * -- June 2010 --
8: *
9: * .. Scalar Arguments ..
10: INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11: $ LDZ, LWORK, N, ND, NH, NS, NV, NW
12: LOGICAL WANTT, WANTZ
13: * ..
14: * .. Array Arguments ..
15: DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
16: $ V( LDV, * ), WORK( * ), WV( LDWV, * ),
17: $ Z( LDZ, * )
18: * ..
19: *
20: * This subroutine is identical to DLAQR3 except that it avoids
21: * recursion by calling DLAHQR instead of DLAQR4.
22: *
23: *
24: * ******************************************************************
25: * Aggressive early deflation:
26: *
27: * This subroutine accepts as input an upper Hessenberg matrix
28: * H and performs an orthogonal similarity transformation
29: * designed to detect and deflate fully converged eigenvalues from
30: * a trailing principal submatrix. On output H has been over-
31: * written by a new Hessenberg matrix that is a perturbation of
32: * an orthogonal similarity transformation of H. It is to be
33: * hoped that the final version of H has many zero subdiagonal
34: * entries.
35: *
36: * ******************************************************************
37: * WANTT (input) LOGICAL
38: * If .TRUE., then the Hessenberg matrix H is fully updated
39: * so that the quasi-triangular Schur factor may be
40: * computed (in cooperation with the calling subroutine).
41: * If .FALSE., then only enough of H is updated to preserve
42: * the eigenvalues.
43: *
44: * WANTZ (input) LOGICAL
45: * If .TRUE., then the orthogonal matrix Z is updated so
46: * so that the orthogonal Schur factor may be computed
47: * (in cooperation with the calling subroutine).
48: * If .FALSE., then Z is not referenced.
49: *
50: * N (input) INTEGER
51: * The order of the matrix H and (if WANTZ is .TRUE.) the
52: * order of the orthogonal matrix Z.
53: *
54: * KTOP (input) INTEGER
55: * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
56: * KBOT and KTOP together determine an isolated block
57: * along the diagonal of the Hessenberg matrix.
58: *
59: * KBOT (input) INTEGER
60: * It is assumed without a check that either
61: * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
62: * determine an isolated block along the diagonal of the
63: * Hessenberg matrix.
64: *
65: * NW (input) INTEGER
66: * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
67: *
68: * H (input/output) DOUBLE PRECISION array, dimension (LDH,N)
69: * On input the initial N-by-N section of H stores the
70: * Hessenberg matrix undergoing aggressive early deflation.
71: * On output H has been transformed by an orthogonal
72: * similarity transformation, perturbed, and the returned
73: * to Hessenberg form that (it is to be hoped) has some
74: * zero subdiagonal entries.
75: *
76: * LDH (input) integer
77: * Leading dimension of H just as declared in the calling
78: * subroutine. N .LE. LDH
79: *
80: * ILOZ (input) INTEGER
81: * IHIZ (input) INTEGER
82: * Specify the rows of Z to which transformations must be
83: * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
84: *
85: * Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
86: * IF WANTZ is .TRUE., then on output, the orthogonal
87: * similarity transformation mentioned above has been
88: * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
89: * If WANTZ is .FALSE., then Z is unreferenced.
90: *
91: * LDZ (input) integer
92: * The leading dimension of Z just as declared in the
93: * calling subroutine. 1 .LE. LDZ.
94: *
95: * NS (output) integer
96: * The number of unconverged (ie approximate) eigenvalues
97: * returned in SR and SI that may be used as shifts by the
98: * calling subroutine.
99: *
100: * ND (output) integer
101: * The number of converged eigenvalues uncovered by this
102: * subroutine.
103: *
104: * SR (output) DOUBLE PRECISION array, dimension (KBOT)
105: * SI (output) DOUBLE PRECISION array, dimension (KBOT)
106: * On output, the real and imaginary parts of approximate
107: * eigenvalues that may be used for shifts are stored in
108: * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
109: * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
110: * The real and imaginary parts of converged eigenvalues
111: * are stored in SR(KBOT-ND+1) through SR(KBOT) and
112: * SI(KBOT-ND+1) through SI(KBOT), respectively.
113: *
114: * V (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
115: * An NW-by-NW work array.
116: *
117: * LDV (input) integer scalar
118: * The leading dimension of V just as declared in the
119: * calling subroutine. NW .LE. LDV
120: *
121: * NH (input) integer scalar
122: * The number of columns of T. NH.GE.NW.
123: *
124: * T (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
125: *
126: * LDT (input) integer
127: * The leading dimension of T just as declared in the
128: * calling subroutine. NW .LE. LDT
129: *
130: * NV (input) integer
131: * The number of rows of work array WV available for
132: * workspace. NV.GE.NW.
133: *
134: * WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
135: *
136: * LDWV (input) integer
137: * The leading dimension of W just as declared in the
138: * calling subroutine. NW .LE. LDV
139: *
140: * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
141: * On exit, WORK(1) is set to an estimate of the optimal value
142: * of LWORK for the given values of N, NW, KTOP and KBOT.
143: *
144: * LWORK (input) integer
145: * The dimension of the work array WORK. LWORK = 2*NW
146: * suffices, but greater efficiency may result from larger
147: * values of LWORK.
148: *
149: * If LWORK = -1, then a workspace query is assumed; DLAQR2
150: * only estimates the optimal workspace size for the given
151: * values of N, NW, KTOP and KBOT. The estimate is returned
152: * in WORK(1). No error message related to LWORK is issued
153: * by XERBLA. Neither H nor Z are accessed.
154: *
155: * ================================================================
156: * Based on contributions by
157: * Karen Braman and Ralph Byers, Department of Mathematics,
158: * University of Kansas, USA
159: *
160: * ================================================================
161: * .. Parameters ..
162: DOUBLE PRECISION ZERO, ONE
163: PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
164: * ..
165: * .. Local Scalars ..
166: DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
167: $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
168: INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
169: $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
170: $ LWKOPT
171: LOGICAL BULGE, SORTED
172: * ..
173: * .. External Functions ..
174: DOUBLE PRECISION DLAMCH
175: EXTERNAL DLAMCH
176: * ..
177: * .. External Subroutines ..
178: EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
179: $ DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC
180: * ..
181: * .. Intrinsic Functions ..
182: INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT
183: * ..
184: * .. Executable Statements ..
185: *
186: * ==== Estimate optimal workspace. ====
187: *
188: JW = MIN( NW, KBOT-KTOP+1 )
189: IF( JW.LE.2 ) THEN
190: LWKOPT = 1
191: ELSE
192: *
193: * ==== Workspace query call to DGEHRD ====
194: *
195: CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
196: LWK1 = INT( WORK( 1 ) )
197: *
198: * ==== Workspace query call to DORMHR ====
199: *
200: CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
201: $ WORK, -1, INFO )
202: LWK2 = INT( WORK( 1 ) )
203: *
204: * ==== Optimal workspace ====
205: *
206: LWKOPT = JW + MAX( LWK1, LWK2 )
207: END IF
208: *
209: * ==== Quick return in case of workspace query. ====
210: *
211: IF( LWORK.EQ.-1 ) THEN
212: WORK( 1 ) = DBLE( LWKOPT )
213: RETURN
214: END IF
215: *
216: * ==== Nothing to do ...
217: * ... for an empty active block ... ====
218: NS = 0
219: ND = 0
220: WORK( 1 ) = ONE
221: IF( KTOP.GT.KBOT )
222: $ RETURN
223: * ... nor for an empty deflation window. ====
224: IF( NW.LT.1 )
225: $ RETURN
226: *
227: * ==== Machine constants ====
228: *
229: SAFMIN = DLAMCH( 'SAFE MINIMUM' )
230: SAFMAX = ONE / SAFMIN
231: CALL DLABAD( SAFMIN, SAFMAX )
232: ULP = DLAMCH( 'PRECISION' )
233: SMLNUM = SAFMIN*( DBLE( N ) / ULP )
234: *
235: * ==== Setup deflation window ====
236: *
237: JW = MIN( NW, KBOT-KTOP+1 )
238: KWTOP = KBOT - JW + 1
239: IF( KWTOP.EQ.KTOP ) THEN
240: S = ZERO
241: ELSE
242: S = H( KWTOP, KWTOP-1 )
243: END IF
244: *
245: IF( KBOT.EQ.KWTOP ) THEN
246: *
247: * ==== 1-by-1 deflation window: not much to do ====
248: *
249: SR( KWTOP ) = H( KWTOP, KWTOP )
250: SI( KWTOP ) = ZERO
251: NS = 1
252: ND = 0
253: IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
254: $ THEN
255: NS = 0
256: ND = 1
257: IF( KWTOP.GT.KTOP )
258: $ H( KWTOP, KWTOP-1 ) = ZERO
259: END IF
260: WORK( 1 ) = ONE
261: RETURN
262: END IF
263: *
264: * ==== Convert to spike-triangular form. (In case of a
265: * . rare QR failure, this routine continues to do
266: * . aggressive early deflation using that part of
267: * . the deflation window that converged using INFQR
268: * . here and there to keep track.) ====
269: *
270: CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
271: CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
272: *
273: CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
274: CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
275: $ SI( KWTOP ), 1, JW, V, LDV, INFQR )
276: *
277: * ==== DTREXC needs a clean margin near the diagonal ====
278: *
279: DO 10 J = 1, JW - 3
280: T( J+2, J ) = ZERO
281: T( J+3, J ) = ZERO
282: 10 CONTINUE
283: IF( JW.GT.2 )
284: $ T( JW, JW-2 ) = ZERO
285: *
286: * ==== Deflation detection loop ====
287: *
288: NS = JW
289: ILST = INFQR + 1
290: 20 CONTINUE
291: IF( ILST.LE.NS ) THEN
292: IF( NS.EQ.1 ) THEN
293: BULGE = .FALSE.
294: ELSE
295: BULGE = T( NS, NS-1 ).NE.ZERO
296: END IF
297: *
298: * ==== Small spike tip test for deflation ====
299: *
300: IF( .NOT.BULGE ) THEN
301: *
302: * ==== Real eigenvalue ====
303: *
304: FOO = ABS( T( NS, NS ) )
305: IF( FOO.EQ.ZERO )
306: $ FOO = ABS( S )
307: IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
308: *
309: * ==== Deflatable ====
310: *
311: NS = NS - 1
312: ELSE
313: *
314: * ==== Undeflatable. Move it up out of the way.
315: * . (DTREXC can not fail in this case.) ====
316: *
317: IFST = NS
318: CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
319: $ INFO )
320: ILST = ILST + 1
321: END IF
322: ELSE
323: *
324: * ==== Complex conjugate pair ====
325: *
326: FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
327: $ SQRT( ABS( T( NS-1, NS ) ) )
328: IF( FOO.EQ.ZERO )
329: $ FOO = ABS( S )
330: IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
331: $ MAX( SMLNUM, ULP*FOO ) ) THEN
332: *
333: * ==== Deflatable ====
334: *
335: NS = NS - 2
336: ELSE
337: *
338: * ==== Undeflatable. Move them up out of the way.
339: * . Fortunately, DTREXC does the right thing with
340: * . ILST in case of a rare exchange failure. ====
341: *
342: IFST = NS
343: CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
344: $ INFO )
345: ILST = ILST + 2
346: END IF
347: END IF
348: *
349: * ==== End deflation detection loop ====
350: *
351: GO TO 20
352: END IF
353: *
354: * ==== Return to Hessenberg form ====
355: *
356: IF( NS.EQ.0 )
357: $ S = ZERO
358: *
359: IF( NS.LT.JW ) THEN
360: *
361: * ==== sorting diagonal blocks of T improves accuracy for
362: * . graded matrices. Bubble sort deals well with
363: * . exchange failures. ====
364: *
365: SORTED = .false.
366: I = NS + 1
367: 30 CONTINUE
368: IF( SORTED )
369: $ GO TO 50
370: SORTED = .true.
371: *
372: KEND = I - 1
373: I = INFQR + 1
374: IF( I.EQ.NS ) THEN
375: K = I + 1
376: ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
377: K = I + 1
378: ELSE
379: K = I + 2
380: END IF
381: 40 CONTINUE
382: IF( K.LE.KEND ) THEN
383: IF( K.EQ.I+1 ) THEN
384: EVI = ABS( T( I, I ) )
385: ELSE
386: EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
387: $ SQRT( ABS( T( I, I+1 ) ) )
388: END IF
389: *
390: IF( K.EQ.KEND ) THEN
391: EVK = ABS( T( K, K ) )
392: ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
393: EVK = ABS( T( K, K ) )
394: ELSE
395: EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
396: $ SQRT( ABS( T( K, K+1 ) ) )
397: END IF
398: *
399: IF( EVI.GE.EVK ) THEN
400: I = K
401: ELSE
402: SORTED = .false.
403: IFST = I
404: ILST = K
405: CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
406: $ INFO )
407: IF( INFO.EQ.0 ) THEN
408: I = ILST
409: ELSE
410: I = K
411: END IF
412: END IF
413: IF( I.EQ.KEND ) THEN
414: K = I + 1
415: ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
416: K = I + 1
417: ELSE
418: K = I + 2
419: END IF
420: GO TO 40
421: END IF
422: GO TO 30
423: 50 CONTINUE
424: END IF
425: *
426: * ==== Restore shift/eigenvalue array from T ====
427: *
428: I = JW
429: 60 CONTINUE
430: IF( I.GE.INFQR+1 ) THEN
431: IF( I.EQ.INFQR+1 ) THEN
432: SR( KWTOP+I-1 ) = T( I, I )
433: SI( KWTOP+I-1 ) = ZERO
434: I = I - 1
435: ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
436: SR( KWTOP+I-1 ) = T( I, I )
437: SI( KWTOP+I-1 ) = ZERO
438: I = I - 1
439: ELSE
440: AA = T( I-1, I-1 )
441: CC = T( I, I-1 )
442: BB = T( I-1, I )
443: DD = T( I, I )
444: CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
445: $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
446: $ SI( KWTOP+I-1 ), CS, SN )
447: I = I - 2
448: END IF
449: GO TO 60
450: END IF
451: *
452: IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
453: IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
454: *
455: * ==== Reflect spike back into lower triangle ====
456: *
457: CALL DCOPY( NS, V, LDV, WORK, 1 )
458: BETA = WORK( 1 )
459: CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
460: WORK( 1 ) = ONE
461: *
462: CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
463: *
464: CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
465: $ WORK( JW+1 ) )
466: CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
467: $ WORK( JW+1 ) )
468: CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
469: $ WORK( JW+1 ) )
470: *
471: CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
472: $ LWORK-JW, INFO )
473: END IF
474: *
475: * ==== Copy updated reduced window into place ====
476: *
477: IF( KWTOP.GT.1 )
478: $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
479: CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
480: CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
481: $ LDH+1 )
482: *
483: * ==== Accumulate orthogonal matrix in order update
484: * . H and Z, if requested. ====
485: *
486: IF( NS.GT.1 .AND. S.NE.ZERO )
487: $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
488: $ WORK( JW+1 ), LWORK-JW, INFO )
489: *
490: * ==== Update vertical slab in H ====
491: *
492: IF( WANTT ) THEN
493: LTOP = 1
494: ELSE
495: LTOP = KTOP
496: END IF
497: DO 70 KROW = LTOP, KWTOP - 1, NV
498: KLN = MIN( NV, KWTOP-KROW )
499: CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
500: $ LDH, V, LDV, ZERO, WV, LDWV )
501: CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
502: 70 CONTINUE
503: *
504: * ==== Update horizontal slab in H ====
505: *
506: IF( WANTT ) THEN
507: DO 80 KCOL = KBOT + 1, N, NH
508: KLN = MIN( NH, N-KCOL+1 )
509: CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
510: $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
511: CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
512: $ LDH )
513: 80 CONTINUE
514: END IF
515: *
516: * ==== Update vertical slab in Z ====
517: *
518: IF( WANTZ ) THEN
519: DO 90 KROW = ILOZ, IHIZ, NV
520: KLN = MIN( NV, IHIZ-KROW+1 )
521: CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
522: $ LDZ, V, LDV, ZERO, WV, LDWV )
523: CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
524: $ LDZ )
525: 90 CONTINUE
526: END IF
527: END IF
528: *
529: * ==== Return the number of deflations ... ====
530: *
531: ND = JW - NS
532: *
533: * ==== ... and the number of shifts. (Subtracting
534: * . INFQR from the spike length takes care
535: * . of the case of a rare QR failure while
536: * . calculating eigenvalues of the deflation
537: * . window.) ====
538: *
539: NS = NS - INFQR
540: *
541: * ==== Return optimal workspace. ====
542: *
543: WORK( 1 ) = DBLE( LWKOPT )
544: *
545: * ==== End of DLAQR2 ====
546: *
547: END
CVSweb interface <joel.bertrand@systella.fr>