1: SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
2: $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
3: $ IDXC, IDXQ, COLTYP, INFO )
4: *
5: * -- LAPACK auxiliary routine (version 3.2) --
6: * -- LAPACK is a software package provided by Univ. of Tennessee, --
7: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
8: * November 2006
9: *
10: * .. Scalar Arguments ..
11: INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
12: DOUBLE PRECISION ALPHA, BETA
13: * ..
14: * .. Array Arguments ..
15: INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
16: $ IDXQ( * )
17: DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ),
18: $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
19: $ Z( * )
20: * ..
21: *
22: * Purpose
23: * =======
24: *
25: * DLASD2 merges the two sets of singular values together into a single
26: * sorted set. Then it tries to deflate the size of the problem.
27: * There are two ways in which deflation can occur: when two or more
28: * singular values are close together or if there is a tiny entry in the
29: * Z vector. For each such occurrence the order of the related secular
30: * equation problem is reduced by one.
31: *
32: * DLASD2 is called from DLASD1.
33: *
34: * Arguments
35: * =========
36: *
37: * NL (input) INTEGER
38: * The row dimension of the upper block. NL >= 1.
39: *
40: * NR (input) INTEGER
41: * The row dimension of the lower block. NR >= 1.
42: *
43: * SQRE (input) INTEGER
44: * = 0: the lower block is an NR-by-NR square matrix.
45: * = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
46: *
47: * The bidiagonal matrix has N = NL + NR + 1 rows and
48: * M = N + SQRE >= N columns.
49: *
50: * K (output) INTEGER
51: * Contains the dimension of the non-deflated matrix,
52: * This is the order of the related secular equation. 1 <= K <=N.
53: *
54: * D (input/output) DOUBLE PRECISION array, dimension(N)
55: * On entry D contains the singular values of the two submatrices
56: * to be combined. On exit D contains the trailing (N-K) updated
57: * singular values (those which were deflated) sorted into
58: * increasing order.
59: *
60: * Z (output) DOUBLE PRECISION array, dimension(N)
61: * On exit Z contains the updating row vector in the secular
62: * equation.
63: *
64: * ALPHA (input) DOUBLE PRECISION
65: * Contains the diagonal element associated with the added row.
66: *
67: * BETA (input) DOUBLE PRECISION
68: * Contains the off-diagonal element associated with the added
69: * row.
70: *
71: * U (input/output) DOUBLE PRECISION array, dimension(LDU,N)
72: * On entry U contains the left singular vectors of two
73: * submatrices in the two square blocks with corners at (1,1),
74: * (NL, NL), and (NL+2, NL+2), (N,N).
75: * On exit U contains the trailing (N-K) updated left singular
76: * vectors (those which were deflated) in its last N-K columns.
77: *
78: * LDU (input) INTEGER
79: * The leading dimension of the array U. LDU >= N.
80: *
81: * VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
82: * On entry VT' contains the right singular vectors of two
83: * submatrices in the two square blocks with corners at (1,1),
84: * (NL+1, NL+1), and (NL+2, NL+2), (M,M).
85: * On exit VT' contains the trailing (N-K) updated right singular
86: * vectors (those which were deflated) in its last N-K columns.
87: * In case SQRE =1, the last row of VT spans the right null
88: * space.
89: *
90: * LDVT (input) INTEGER
91: * The leading dimension of the array VT. LDVT >= M.
92: *
93: * DSIGMA (output) DOUBLE PRECISION array, dimension (N)
94: * Contains a copy of the diagonal elements (K-1 singular values
95: * and one zero) in the secular equation.
96: *
97: * U2 (output) DOUBLE PRECISION array, dimension(LDU2,N)
98: * Contains a copy of the first K-1 left singular vectors which
99: * will be used by DLASD3 in a matrix multiply (DGEMM) to solve
100: * for the new left singular vectors. U2 is arranged into four
101: * blocks. The first block contains a column with 1 at NL+1 and
102: * zero everywhere else; the second block contains non-zero
103: * entries only at and above NL; the third contains non-zero
104: * entries only below NL+1; and the fourth is dense.
105: *
106: * LDU2 (input) INTEGER
107: * The leading dimension of the array U2. LDU2 >= N.
108: *
109: * VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N)
110: * VT2' contains a copy of the first K right singular vectors
111: * which will be used by DLASD3 in a matrix multiply (DGEMM) to
112: * solve for the new right singular vectors. VT2 is arranged into
113: * three blocks. The first block contains a row that corresponds
114: * to the special 0 diagonal element in SIGMA; the second block
115: * contains non-zeros only at and before NL +1; the third block
116: * contains non-zeros only at and after NL +2.
117: *
118: * LDVT2 (input) INTEGER
119: * The leading dimension of the array VT2. LDVT2 >= M.
120: *
121: * IDXP (workspace) INTEGER array dimension(N)
122: * This will contain the permutation used to place deflated
123: * values of D at the end of the array. On output IDXP(2:K)
124: * points to the nondeflated D-values and IDXP(K+1:N)
125: * points to the deflated singular values.
126: *
127: * IDX (workspace) INTEGER array dimension(N)
128: * This will contain the permutation used to sort the contents of
129: * D into ascending order.
130: *
131: * IDXC (output) INTEGER array dimension(N)
132: * This will contain the permutation used to arrange the columns
133: * of the deflated U matrix into three groups: the first group
134: * contains non-zero entries only at and above NL, the second
135: * contains non-zero entries only below NL+2, and the third is
136: * dense.
137: *
138: * IDXQ (input/output) INTEGER array dimension(N)
139: * This contains the permutation which separately sorts the two
140: * sub-problems in D into ascending order. Note that entries in
141: * the first hlaf of this permutation must first be moved one
142: * position backward; and entries in the second half
143: * must first have NL+1 added to their values.
144: *
145: * COLTYP (workspace/output) INTEGER array dimension(N)
146: * As workspace, this will contain a label which will indicate
147: * which of the following types a column in the U2 matrix or a
148: * row in the VT2 matrix is:
149: * 1 : non-zero in the upper half only
150: * 2 : non-zero in the lower half only
151: * 3 : dense
152: * 4 : deflated
153: *
154: * On exit, it is an array of dimension 4, with COLTYP(I) being
155: * the dimension of the I-th type columns.
156: *
157: * INFO (output) INTEGER
158: * = 0: successful exit.
159: * < 0: if INFO = -i, the i-th argument had an illegal value.
160: *
161: * Further Details
162: * ===============
163: *
164: * Based on contributions by
165: * Ming Gu and Huan Ren, Computer Science Division, University of
166: * California at Berkeley, USA
167: *
168: * =====================================================================
169: *
170: * .. Parameters ..
171: DOUBLE PRECISION ZERO, ONE, TWO, EIGHT
172: PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
173: $ EIGHT = 8.0D+0 )
174: * ..
175: * .. Local Arrays ..
176: INTEGER CTOT( 4 ), PSM( 4 )
177: * ..
178: * .. Local Scalars ..
179: INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
180: $ N, NLP1, NLP2
181: DOUBLE PRECISION C, EPS, HLFTOL, S, TAU, TOL, Z1
182: * ..
183: * .. External Functions ..
184: DOUBLE PRECISION DLAMCH, DLAPY2
185: EXTERNAL DLAMCH, DLAPY2
186: * ..
187: * .. External Subroutines ..
188: EXTERNAL DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA
189: * ..
190: * .. Intrinsic Functions ..
191: INTRINSIC ABS, MAX
192: * ..
193: * .. Executable Statements ..
194: *
195: * Test the input parameters.
196: *
197: INFO = 0
198: *
199: IF( NL.LT.1 ) THEN
200: INFO = -1
201: ELSE IF( NR.LT.1 ) THEN
202: INFO = -2
203: ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
204: INFO = -3
205: END IF
206: *
207: N = NL + NR + 1
208: M = N + SQRE
209: *
210: IF( LDU.LT.N ) THEN
211: INFO = -10
212: ELSE IF( LDVT.LT.M ) THEN
213: INFO = -12
214: ELSE IF( LDU2.LT.N ) THEN
215: INFO = -15
216: ELSE IF( LDVT2.LT.M ) THEN
217: INFO = -17
218: END IF
219: IF( INFO.NE.0 ) THEN
220: CALL XERBLA( 'DLASD2', -INFO )
221: RETURN
222: END IF
223: *
224: NLP1 = NL + 1
225: NLP2 = NL + 2
226: *
227: * Generate the first part of the vector Z; and move the singular
228: * values in the first part of D one position backward.
229: *
230: Z1 = ALPHA*VT( NLP1, NLP1 )
231: Z( 1 ) = Z1
232: DO 10 I = NL, 1, -1
233: Z( I+1 ) = ALPHA*VT( I, NLP1 )
234: D( I+1 ) = D( I )
235: IDXQ( I+1 ) = IDXQ( I ) + 1
236: 10 CONTINUE
237: *
238: * Generate the second part of the vector Z.
239: *
240: DO 20 I = NLP2, M
241: Z( I ) = BETA*VT( I, NLP2 )
242: 20 CONTINUE
243: *
244: * Initialize some reference arrays.
245: *
246: DO 30 I = 2, NLP1
247: COLTYP( I ) = 1
248: 30 CONTINUE
249: DO 40 I = NLP2, N
250: COLTYP( I ) = 2
251: 40 CONTINUE
252: *
253: * Sort the singular values into increasing order
254: *
255: DO 50 I = NLP2, N
256: IDXQ( I ) = IDXQ( I ) + NLP1
257: 50 CONTINUE
258: *
259: * DSIGMA, IDXC, IDXC, and the first column of U2
260: * are used as storage space.
261: *
262: DO 60 I = 2, N
263: DSIGMA( I ) = D( IDXQ( I ) )
264: U2( I, 1 ) = Z( IDXQ( I ) )
265: IDXC( I ) = COLTYP( IDXQ( I ) )
266: 60 CONTINUE
267: *
268: CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
269: *
270: DO 70 I = 2, N
271: IDXI = 1 + IDX( I )
272: D( I ) = DSIGMA( IDXI )
273: Z( I ) = U2( IDXI, 1 )
274: COLTYP( I ) = IDXC( IDXI )
275: 70 CONTINUE
276: *
277: * Calculate the allowable deflation tolerance
278: *
279: EPS = DLAMCH( 'Epsilon' )
280: TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
281: TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
282: *
283: * There are 2 kinds of deflation -- first a value in the z-vector
284: * is small, second two (or more) singular values are very close
285: * together (their difference is small).
286: *
287: * If the value in the z-vector is small, we simply permute the
288: * array so that the corresponding singular value is moved to the
289: * end.
290: *
291: * If two values in the D-vector are close, we perform a two-sided
292: * rotation designed to make one of the corresponding z-vector
293: * entries zero, and then permute the array so that the deflated
294: * singular value is moved to the end.
295: *
296: * If there are multiple singular values then the problem deflates.
297: * Here the number of equal singular values are found. As each equal
298: * singular value is found, an elementary reflector is computed to
299: * rotate the corresponding singular subspace so that the
300: * corresponding components of Z are zero in this new basis.
301: *
302: K = 1
303: K2 = N + 1
304: DO 80 J = 2, N
305: IF( ABS( Z( J ) ).LE.TOL ) THEN
306: *
307: * Deflate due to small z component.
308: *
309: K2 = K2 - 1
310: IDXP( K2 ) = J
311: COLTYP( J ) = 4
312: IF( J.EQ.N )
313: $ GO TO 120
314: ELSE
315: JPREV = J
316: GO TO 90
317: END IF
318: 80 CONTINUE
319: 90 CONTINUE
320: J = JPREV
321: 100 CONTINUE
322: J = J + 1
323: IF( J.GT.N )
324: $ GO TO 110
325: IF( ABS( Z( J ) ).LE.TOL ) THEN
326: *
327: * Deflate due to small z component.
328: *
329: K2 = K2 - 1
330: IDXP( K2 ) = J
331: COLTYP( J ) = 4
332: ELSE
333: *
334: * Check if singular values are close enough to allow deflation.
335: *
336: IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
337: *
338: * Deflation is possible.
339: *
340: S = Z( JPREV )
341: C = Z( J )
342: *
343: * Find sqrt(a**2+b**2) without overflow or
344: * destructive underflow.
345: *
346: TAU = DLAPY2( C, S )
347: C = C / TAU
348: S = -S / TAU
349: Z( J ) = TAU
350: Z( JPREV ) = ZERO
351: *
352: * Apply back the Givens rotation to the left and right
353: * singular vector matrices.
354: *
355: IDXJP = IDXQ( IDX( JPREV )+1 )
356: IDXJ = IDXQ( IDX( J )+1 )
357: IF( IDXJP.LE.NLP1 ) THEN
358: IDXJP = IDXJP - 1
359: END IF
360: IF( IDXJ.LE.NLP1 ) THEN
361: IDXJ = IDXJ - 1
362: END IF
363: CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
364: CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
365: $ S )
366: IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
367: COLTYP( J ) = 3
368: END IF
369: COLTYP( JPREV ) = 4
370: K2 = K2 - 1
371: IDXP( K2 ) = JPREV
372: JPREV = J
373: ELSE
374: K = K + 1
375: U2( K, 1 ) = Z( JPREV )
376: DSIGMA( K ) = D( JPREV )
377: IDXP( K ) = JPREV
378: JPREV = J
379: END IF
380: END IF
381: GO TO 100
382: 110 CONTINUE
383: *
384: * Record the last singular value.
385: *
386: K = K + 1
387: U2( K, 1 ) = Z( JPREV )
388: DSIGMA( K ) = D( JPREV )
389: IDXP( K ) = JPREV
390: *
391: 120 CONTINUE
392: *
393: * Count up the total number of the various types of columns, then
394: * form a permutation which positions the four column types into
395: * four groups of uniform structure (although one or more of these
396: * groups may be empty).
397: *
398: DO 130 J = 1, 4
399: CTOT( J ) = 0
400: 130 CONTINUE
401: DO 140 J = 2, N
402: CT = COLTYP( J )
403: CTOT( CT ) = CTOT( CT ) + 1
404: 140 CONTINUE
405: *
406: * PSM(*) = Position in SubMatrix (of types 1 through 4)
407: *
408: PSM( 1 ) = 2
409: PSM( 2 ) = 2 + CTOT( 1 )
410: PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
411: PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
412: *
413: * Fill out the IDXC array so that the permutation which it induces
414: * will place all type-1 columns first, all type-2 columns next,
415: * then all type-3's, and finally all type-4's, starting from the
416: * second column. This applies similarly to the rows of VT.
417: *
418: DO 150 J = 2, N
419: JP = IDXP( J )
420: CT = COLTYP( JP )
421: IDXC( PSM( CT ) ) = J
422: PSM( CT ) = PSM( CT ) + 1
423: 150 CONTINUE
424: *
425: * Sort the singular values and corresponding singular vectors into
426: * DSIGMA, U2, and VT2 respectively. The singular values/vectors
427: * which were not deflated go into the first K slots of DSIGMA, U2,
428: * and VT2 respectively, while those which were deflated go into the
429: * last N - K slots, except that the first column/row will be treated
430: * separately.
431: *
432: DO 160 J = 2, N
433: JP = IDXP( J )
434: DSIGMA( J ) = D( JP )
435: IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
436: IF( IDXJ.LE.NLP1 ) THEN
437: IDXJ = IDXJ - 1
438: END IF
439: CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
440: CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
441: 160 CONTINUE
442: *
443: * Determine DSIGMA(1), DSIGMA(2) and Z(1)
444: *
445: DSIGMA( 1 ) = ZERO
446: HLFTOL = TOL / TWO
447: IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
448: $ DSIGMA( 2 ) = HLFTOL
449: IF( M.GT.N ) THEN
450: Z( 1 ) = DLAPY2( Z1, Z( M ) )
451: IF( Z( 1 ).LE.TOL ) THEN
452: C = ONE
453: S = ZERO
454: Z( 1 ) = TOL
455: ELSE
456: C = Z1 / Z( 1 )
457: S = Z( M ) / Z( 1 )
458: END IF
459: ELSE
460: IF( ABS( Z1 ).LE.TOL ) THEN
461: Z( 1 ) = TOL
462: ELSE
463: Z( 1 ) = Z1
464: END IF
465: END IF
466: *
467: * Move the rest of the updating row to Z.
468: *
469: CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
470: *
471: * Determine the first column of U2, the first row of VT2 and the
472: * last row of VT.
473: *
474: CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
475: U2( NLP1, 1 ) = ONE
476: IF( M.GT.N ) THEN
477: DO 170 I = 1, NLP1
478: VT( M, I ) = -S*VT( NLP1, I )
479: VT2( 1, I ) = C*VT( NLP1, I )
480: 170 CONTINUE
481: DO 180 I = NLP2, M
482: VT2( 1, I ) = S*VT( M, I )
483: VT( M, I ) = C*VT( M, I )
484: 180 CONTINUE
485: ELSE
486: CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
487: END IF
488: IF( M.GT.N ) THEN
489: CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
490: END IF
491: *
492: * The deflated singular values and their corresponding vectors go
493: * into the back of D, U, and V respectively.
494: *
495: IF( N.GT.K ) THEN
496: CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
497: CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
498: $ LDU )
499: CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
500: $ LDVT )
501: END IF
502: *
503: * Copy CTOT into COLTYP for referencing in DLASD3.
504: *
505: DO 190 J = 1, 4
506: COLTYP( J ) = CTOT( J )
507: 190 CONTINUE
508: *
509: RETURN
510: *
511: * End of DLASD2
512: *
513: END
CVSweb interface <joel.bertrand@systella.fr>