1: *> \brief \b DLAED2
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download DLAED2 + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed2.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed2.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed2.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
22: * Q2, INDX, INDXC, INDXP, COLTYP, INFO )
23: *
24: * .. Scalar Arguments ..
25: * INTEGER INFO, K, LDQ, N, N1
26: * DOUBLE PRECISION RHO
27: * ..
28: * .. Array Arguments ..
29: * INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
30: * $ INDXQ( * )
31: * DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
32: * $ W( * ), Z( * )
33: * ..
34: *
35: *
36: *> \par Purpose:
37: * =============
38: *>
39: *> \verbatim
40: *>
41: *> DLAED2 merges the two sets of eigenvalues together into a single
42: *> sorted set. Then it tries to deflate the size of the problem.
43: *> There are two ways in which deflation can occur: when two or more
44: *> eigenvalues are close together or if there is a tiny entry in the
45: *> Z vector. For each such occurrence the order of the related secular
46: *> equation problem is reduced by one.
47: *> \endverbatim
48: *
49: * Arguments:
50: * ==========
51: *
52: *> \param[out] K
53: *> \verbatim
54: *> K is INTEGER
55: *> The number of non-deflated eigenvalues, and the order of the
56: *> related secular equation. 0 <= K <=N.
57: *> \endverbatim
58: *>
59: *> \param[in] N
60: *> \verbatim
61: *> N is INTEGER
62: *> The dimension of the symmetric tridiagonal matrix. N >= 0.
63: *> \endverbatim
64: *>
65: *> \param[in] N1
66: *> \verbatim
67: *> N1 is INTEGER
68: *> The location of the last eigenvalue in the leading sub-matrix.
69: *> min(1,N) <= N1 <= N/2.
70: *> \endverbatim
71: *>
72: *> \param[in,out] D
73: *> \verbatim
74: *> D is DOUBLE PRECISION array, dimension (N)
75: *> On entry, D contains the eigenvalues of the two submatrices to
76: *> be combined.
77: *> On exit, D contains the trailing (N-K) updated eigenvalues
78: *> (those which were deflated) sorted into increasing order.
79: *> \endverbatim
80: *>
81: *> \param[in,out] Q
82: *> \verbatim
83: *> Q is DOUBLE PRECISION array, dimension (LDQ, N)
84: *> On entry, Q contains the eigenvectors of two submatrices in
85: *> the two square blocks with corners at (1,1), (N1,N1)
86: *> and (N1+1, N1+1), (N,N).
87: *> On exit, Q contains the trailing (N-K) updated eigenvectors
88: *> (those which were deflated) in its last N-K columns.
89: *> \endverbatim
90: *>
91: *> \param[in] LDQ
92: *> \verbatim
93: *> LDQ is INTEGER
94: *> The leading dimension of the array Q. LDQ >= max(1,N).
95: *> \endverbatim
96: *>
97: *> \param[in,out] INDXQ
98: *> \verbatim
99: *> INDXQ is INTEGER array, dimension (N)
100: *> The permutation which separately sorts the two sub-problems
101: *> in D into ascending order. Note that elements in the second
102: *> half of this permutation must first have N1 added to their
103: *> values. Destroyed on exit.
104: *> \endverbatim
105: *>
106: *> \param[in,out] RHO
107: *> \verbatim
108: *> RHO is DOUBLE PRECISION
109: *> On entry, the off-diagonal element associated with the rank-1
110: *> cut which originally split the two submatrices which are now
111: *> being recombined.
112: *> On exit, RHO has been modified to the value required by
113: *> DLAED3.
114: *> \endverbatim
115: *>
116: *> \param[in] Z
117: *> \verbatim
118: *> Z is DOUBLE PRECISION array, dimension (N)
119: *> On entry, Z contains the updating vector (the last
120: *> row of the first sub-eigenvector matrix and the first row of
121: *> the second sub-eigenvector matrix).
122: *> On exit, the contents of Z have been destroyed by the updating
123: *> process.
124: *> \endverbatim
125: *>
126: *> \param[out] DLAMDA
127: *> \verbatim
128: *> DLAMDA is DOUBLE PRECISION array, dimension (N)
129: *> A copy of the first K eigenvalues which will be used by
130: *> DLAED3 to form the secular equation.
131: *> \endverbatim
132: *>
133: *> \param[out] W
134: *> \verbatim
135: *> W is DOUBLE PRECISION array, dimension (N)
136: *> The first k values of the final deflation-altered z-vector
137: *> which will be passed to DLAED3.
138: *> \endverbatim
139: *>
140: *> \param[out] Q2
141: *> \verbatim
142: *> Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
143: *> A copy of the first K eigenvectors which will be used by
144: *> DLAED3 in a matrix multiply (DGEMM) to solve for the new
145: *> eigenvectors.
146: *> \endverbatim
147: *>
148: *> \param[out] INDX
149: *> \verbatim
150: *> INDX is INTEGER array, dimension (N)
151: *> The permutation used to sort the contents of DLAMDA into
152: *> ascending order.
153: *> \endverbatim
154: *>
155: *> \param[out] INDXC
156: *> \verbatim
157: *> INDXC is INTEGER array, dimension (N)
158: *> The permutation used to arrange the columns of the deflated
159: *> Q matrix into three groups: the first group contains non-zero
160: *> elements only at and above N1, the second contains
161: *> non-zero elements only below N1, and the third is dense.
162: *> \endverbatim
163: *>
164: *> \param[out] INDXP
165: *> \verbatim
166: *> INDXP is INTEGER array, dimension (N)
167: *> The permutation used to place deflated values of D at the end
168: *> of the array. INDXP(1:K) points to the nondeflated D-values
169: *> and INDXP(K+1:N) points to the deflated eigenvalues.
170: *> \endverbatim
171: *>
172: *> \param[out] COLTYP
173: *> \verbatim
174: *> COLTYP is INTEGER array, dimension (N)
175: *> During execution, a label which will indicate which of the
176: *> following types a column in the Q2 matrix is:
177: *> 1 : non-zero in the upper half only;
178: *> 2 : dense;
179: *> 3 : non-zero in the lower half only;
180: *> 4 : deflated.
181: *> On exit, COLTYP(i) is the number of columns of type i,
182: *> for i=1 to 4 only.
183: *> \endverbatim
184: *>
185: *> \param[out] INFO
186: *> \verbatim
187: *> INFO is INTEGER
188: *> = 0: successful exit.
189: *> < 0: if INFO = -i, the i-th argument had an illegal value.
190: *> \endverbatim
191: *
192: * Authors:
193: * ========
194: *
195: *> \author Univ. of Tennessee
196: *> \author Univ. of California Berkeley
197: *> \author Univ. of Colorado Denver
198: *> \author NAG Ltd.
199: *
200: *> \date November 2011
201: *
202: *> \ingroup auxOTHERcomputational
203: *
204: *> \par Contributors:
205: * ==================
206: *>
207: *> Jeff Rutter, Computer Science Division, University of California
208: *> at Berkeley, USA \n
209: *> Modified by Francoise Tisseur, University of Tennessee
210: *>
211: * =====================================================================
212: SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
213: $ Q2, INDX, INDXC, INDXP, COLTYP, INFO )
214: *
215: * -- LAPACK computational routine (version 3.4.0) --
216: * -- LAPACK is a software package provided by Univ. of Tennessee, --
217: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
218: * November 2011
219: *
220: * .. Scalar Arguments ..
221: INTEGER INFO, K, LDQ, N, N1
222: DOUBLE PRECISION RHO
223: * ..
224: * .. Array Arguments ..
225: INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
226: $ INDXQ( * )
227: DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
228: $ W( * ), Z( * )
229: * ..
230: *
231: * =====================================================================
232: *
233: * .. Parameters ..
234: DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT
235: PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
236: $ TWO = 2.0D0, EIGHT = 8.0D0 )
237: * ..
238: * .. Local Arrays ..
239: INTEGER CTOT( 4 ), PSM( 4 )
240: * ..
241: * .. Local Scalars ..
242: INTEGER CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
243: $ N2, NJ, PJ
244: DOUBLE PRECISION C, EPS, S, T, TAU, TOL
245: * ..
246: * .. External Functions ..
247: INTEGER IDAMAX
248: DOUBLE PRECISION DLAMCH, DLAPY2
249: EXTERNAL IDAMAX, DLAMCH, DLAPY2
250: * ..
251: * .. External Subroutines ..
252: EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
253: * ..
254: * .. Intrinsic Functions ..
255: INTRINSIC ABS, MAX, MIN, SQRT
256: * ..
257: * .. Executable Statements ..
258: *
259: * Test the input parameters.
260: *
261: INFO = 0
262: *
263: IF( N.LT.0 ) THEN
264: INFO = -2
265: ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
266: INFO = -6
267: ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
268: INFO = -3
269: END IF
270: IF( INFO.NE.0 ) THEN
271: CALL XERBLA( 'DLAED2', -INFO )
272: RETURN
273: END IF
274: *
275: * Quick return if possible
276: *
277: IF( N.EQ.0 )
278: $ RETURN
279: *
280: N2 = N - N1
281: N1P1 = N1 + 1
282: *
283: IF( RHO.LT.ZERO ) THEN
284: CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
285: END IF
286: *
287: * Normalize z so that norm(z) = 1. Since z is the concatenation of
288: * two normalized vectors, norm2(z) = sqrt(2).
289: *
290: T = ONE / SQRT( TWO )
291: CALL DSCAL( N, T, Z, 1 )
292: *
293: * RHO = ABS( norm(z)**2 * RHO )
294: *
295: RHO = ABS( TWO*RHO )
296: *
297: * Sort the eigenvalues into increasing order
298: *
299: DO 10 I = N1P1, N
300: INDXQ( I ) = INDXQ( I ) + N1
301: 10 CONTINUE
302: *
303: * re-integrate the deflated parts from the last pass
304: *
305: DO 20 I = 1, N
306: DLAMDA( I ) = D( INDXQ( I ) )
307: 20 CONTINUE
308: CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
309: DO 30 I = 1, N
310: INDX( I ) = INDXQ( INDXC( I ) )
311: 30 CONTINUE
312: *
313: * Calculate the allowable deflation tolerance
314: *
315: IMAX = IDAMAX( N, Z, 1 )
316: JMAX = IDAMAX( N, D, 1 )
317: EPS = DLAMCH( 'Epsilon' )
318: TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
319: *
320: * If the rank-1 modifier is small enough, no more needs to be done
321: * except to reorganize Q so that its columns correspond with the
322: * elements in D.
323: *
324: IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
325: K = 0
326: IQ2 = 1
327: DO 40 J = 1, N
328: I = INDX( J )
329: CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
330: DLAMDA( J ) = D( I )
331: IQ2 = IQ2 + N
332: 40 CONTINUE
333: CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
334: CALL DCOPY( N, DLAMDA, 1, D, 1 )
335: GO TO 190
336: END IF
337: *
338: * If there are multiple eigenvalues then the problem deflates. Here
339: * the number of equal eigenvalues are found. As each equal
340: * eigenvalue is found, an elementary reflector is computed to rotate
341: * the corresponding eigensubspace so that the corresponding
342: * components of Z are zero in this new basis.
343: *
344: DO 50 I = 1, N1
345: COLTYP( I ) = 1
346: 50 CONTINUE
347: DO 60 I = N1P1, N
348: COLTYP( I ) = 3
349: 60 CONTINUE
350: *
351: *
352: K = 0
353: K2 = N + 1
354: DO 70 J = 1, N
355: NJ = INDX( J )
356: IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
357: *
358: * Deflate due to small z component.
359: *
360: K2 = K2 - 1
361: COLTYP( NJ ) = 4
362: INDXP( K2 ) = NJ
363: IF( J.EQ.N )
364: $ GO TO 100
365: ELSE
366: PJ = NJ
367: GO TO 80
368: END IF
369: 70 CONTINUE
370: 80 CONTINUE
371: J = J + 1
372: NJ = INDX( J )
373: IF( J.GT.N )
374: $ GO TO 100
375: IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
376: *
377: * Deflate due to small z component.
378: *
379: K2 = K2 - 1
380: COLTYP( NJ ) = 4
381: INDXP( K2 ) = NJ
382: ELSE
383: *
384: * Check if eigenvalues are close enough to allow deflation.
385: *
386: S = Z( PJ )
387: C = Z( NJ )
388: *
389: * Find sqrt(a**2+b**2) without overflow or
390: * destructive underflow.
391: *
392: TAU = DLAPY2( C, S )
393: T = D( NJ ) - D( PJ )
394: C = C / TAU
395: S = -S / TAU
396: IF( ABS( T*C*S ).LE.TOL ) THEN
397: *
398: * Deflation is possible.
399: *
400: Z( NJ ) = TAU
401: Z( PJ ) = ZERO
402: IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
403: $ COLTYP( NJ ) = 2
404: COLTYP( PJ ) = 4
405: CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
406: T = D( PJ )*C**2 + D( NJ )*S**2
407: D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
408: D( PJ ) = T
409: K2 = K2 - 1
410: I = 1
411: 90 CONTINUE
412: IF( K2+I.LE.N ) THEN
413: IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
414: INDXP( K2+I-1 ) = INDXP( K2+I )
415: INDXP( K2+I ) = PJ
416: I = I + 1
417: GO TO 90
418: ELSE
419: INDXP( K2+I-1 ) = PJ
420: END IF
421: ELSE
422: INDXP( K2+I-1 ) = PJ
423: END IF
424: PJ = NJ
425: ELSE
426: K = K + 1
427: DLAMDA( K ) = D( PJ )
428: W( K ) = Z( PJ )
429: INDXP( K ) = PJ
430: PJ = NJ
431: END IF
432: END IF
433: GO TO 80
434: 100 CONTINUE
435: *
436: * Record the last eigenvalue.
437: *
438: K = K + 1
439: DLAMDA( K ) = D( PJ )
440: W( K ) = Z( PJ )
441: INDXP( K ) = PJ
442: *
443: * Count up the total number of the various types of columns, then
444: * form a permutation which positions the four column types into
445: * four uniform groups (although one or more of these groups may be
446: * empty).
447: *
448: DO 110 J = 1, 4
449: CTOT( J ) = 0
450: 110 CONTINUE
451: DO 120 J = 1, N
452: CT = COLTYP( J )
453: CTOT( CT ) = CTOT( CT ) + 1
454: 120 CONTINUE
455: *
456: * PSM(*) = Position in SubMatrix (of types 1 through 4)
457: *
458: PSM( 1 ) = 1
459: PSM( 2 ) = 1 + CTOT( 1 )
460: PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
461: PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
462: K = N - CTOT( 4 )
463: *
464: * Fill out the INDXC array so that the permutation which it induces
465: * will place all type-1 columns first, all type-2 columns next,
466: * then all type-3's, and finally all type-4's.
467: *
468: DO 130 J = 1, N
469: JS = INDXP( J )
470: CT = COLTYP( JS )
471: INDX( PSM( CT ) ) = JS
472: INDXC( PSM( CT ) ) = J
473: PSM( CT ) = PSM( CT ) + 1
474: 130 CONTINUE
475: *
476: * Sort the eigenvalues and corresponding eigenvectors into DLAMDA
477: * and Q2 respectively. The eigenvalues/vectors which were not
478: * deflated go into the first K slots of DLAMDA and Q2 respectively,
479: * while those which were deflated go into the last N - K slots.
480: *
481: I = 1
482: IQ1 = 1
483: IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
484: DO 140 J = 1, CTOT( 1 )
485: JS = INDX( I )
486: CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
487: Z( I ) = D( JS )
488: I = I + 1
489: IQ1 = IQ1 + N1
490: 140 CONTINUE
491: *
492: DO 150 J = 1, CTOT( 2 )
493: JS = INDX( I )
494: CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
495: CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
496: Z( I ) = D( JS )
497: I = I + 1
498: IQ1 = IQ1 + N1
499: IQ2 = IQ2 + N2
500: 150 CONTINUE
501: *
502: DO 160 J = 1, CTOT( 3 )
503: JS = INDX( I )
504: CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
505: Z( I ) = D( JS )
506: I = I + 1
507: IQ2 = IQ2 + N2
508: 160 CONTINUE
509: *
510: IQ1 = IQ2
511: DO 170 J = 1, CTOT( 4 )
512: JS = INDX( I )
513: CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
514: IQ2 = IQ2 + N
515: Z( I ) = D( JS )
516: I = I + 1
517: 170 CONTINUE
518: *
519: * The deflated eigenvalues and their corresponding vectors go back
520: * into the last N - K slots of D and Q respectively.
521: *
522: IF( K.LT.N ) THEN
523: CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
524: $ Q( 1, K+1 ), LDQ )
525: CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
526: END IF
527: *
528: * Copy CTOT into COLTYP for referencing in DLAED3.
529: *
530: DO 180 J = 1, 4
531: COLTYP( J ) = CTOT( J )
532: 180 CONTINUE
533: *
534: 190 CONTINUE
535: RETURN
536: *
537: * End of DLAED2
538: *
539: END
CVSweb interface <joel.bertrand@systella.fr>