1: SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,
2: $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,
3: $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,
4: $ B22D, B22E, WORK, LWORK, INFO )
5: IMPLICIT NONE
6: *
7: * -- LAPACK routine (version 3.3.0) --
8: *
9: * -- Contributed by Brian Sutton of the Randolph-Macon College --
10: * -- November 2010
11: *
12: * -- LAPACK is a software package provided by Univ. of Tennessee, --
13: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
14: *
15: * .. Scalar Arguments ..
16: CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
17: INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q
18: * ..
19: * .. Array Arguments ..
20: DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ),
21: $ B21D( * ), B21E( * ), B22D( * ), B22E( * ),
22: $ PHI( * ), THETA( * ), WORK( * )
23: DOUBLE PRECISION U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
24: $ V2T( LDV2T, * )
25: * ..
26: *
27: * Purpose
28: * =======
29: *
30: * DBBCSD computes the CS decomposition of an orthogonal matrix in
31: * bidiagonal-block form,
32: *
33: *
34: * [ B11 | B12 0 0 ]
35: * [ 0 | 0 -I 0 ]
36: * X = [----------------]
37: * [ B21 | B22 0 0 ]
38: * [ 0 | 0 0 I ]
39: *
40: * [ C | -S 0 0 ]
41: * [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T
42: * = [---------] [---------------] [---------] .
43: * [ | U2 ] [ S | C 0 0 ] [ | V2 ]
44: * [ 0 | 0 0 I ]
45: *
46: * X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
47: * than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
48: * transposed and/or permuted. This can be done in constant time using
49: * the TRANS and SIGNS options. See DORCSD for details.)
50: *
51: * The bidiagonal matrices B11, B12, B21, and B22 are represented
52: * implicitly by angles THETA(1:Q) and PHI(1:Q-1).
53: *
54: * The orthogonal matrices U1, U2, V1T, and V2T are input/output.
55: * The input matrices are pre- or post-multiplied by the appropriate
56: * singular vector matrices.
57: *
58: * Arguments
59: * =========
60: *
61: * JOBU1 (input) CHARACTER
62: * = 'Y': U1 is updated;
63: * otherwise: U1 is not updated.
64: *
65: * JOBU2 (input) CHARACTER
66: * = 'Y': U2 is updated;
67: * otherwise: U2 is not updated.
68: *
69: * JOBV1T (input) CHARACTER
70: * = 'Y': V1T is updated;
71: * otherwise: V1T is not updated.
72: *
73: * JOBV2T (input) CHARACTER
74: * = 'Y': V2T is updated;
75: * otherwise: V2T is not updated.
76: *
77: * TRANS (input) CHARACTER
78: * = 'T': X, U1, U2, V1T, and V2T are stored in row-major
79: * order;
80: * otherwise: X, U1, U2, V1T, and V2T are stored in column-
81: * major order.
82: *
83: * M (input) INTEGER
84: * The number of rows and columns in X, the orthogonal matrix in
85: * bidiagonal-block form.
86: *
87: * P (input) INTEGER
88: * The number of rows in the top-left block of X. 0 <= P <= M.
89: *
90: * Q (input) INTEGER
91: * The number of columns in the top-left block of X.
92: * 0 <= Q <= MIN(P,M-P,M-Q).
93: *
94: * THETA (input/output) DOUBLE PRECISION array, dimension (Q)
95: * On entry, the angles THETA(1),...,THETA(Q) that, along with
96: * PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
97: * form. On exit, the angles whose cosines and sines define the
98: * diagonal blocks in the CS decomposition.
99: *
100: * PHI (input/workspace) DOUBLE PRECISION array, dimension (Q-1)
101: * The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
102: * THETA(Q), define the matrix in bidiagonal-block form.
103: *
104: * U1 (input/output) DOUBLE PRECISION array, dimension (LDU1,P)
105: * On entry, an LDU1-by-P matrix. On exit, U1 is postmultiplied
106: * by the left singular vector matrix common to [ B11 ; 0 ] and
107: * [ B12 0 0 ; 0 -I 0 0 ].
108: *
109: * LDU1 (input) INTEGER
110: * The leading dimension of the array U1.
111: *
112: * U2 (input/output) DOUBLE PRECISION array, dimension (LDU2,M-P)
113: * On entry, an LDU2-by-(M-P) matrix. On exit, U2 is
114: * postmultiplied by the left singular vector matrix common to
115: * [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
116: *
117: * LDU2 (input) INTEGER
118: * The leading dimension of the array U2.
119: *
120: * V1T (input/output) DOUBLE PRECISION array, dimension (LDV1T,Q)
121: * On entry, a LDV1T-by-Q matrix. On exit, V1T is premultiplied
122: * by the transpose of the right singular vector
123: * matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
124: *
125: * LDV1T (input) INTEGER
126: * The leading dimension of the array V1T.
127: *
128: * V2T (input/output) DOUBLE PRECISION array, dimenison (LDV2T,M-Q)
129: * On entry, a LDV2T-by-(M-Q) matrix. On exit, V2T is
130: * premultiplied by the transpose of the right
131: * singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
132: * [ B22 0 0 ; 0 0 I ].
133: *
134: * LDV2T (input) INTEGER
135: * The leading dimension of the array V2T.
136: *
137: * B11D (output) DOUBLE PRECISION array, dimension (Q)
138: * When DBBCSD converges, B11D contains the cosines of THETA(1),
139: * ..., THETA(Q). If DBBCSD fails to converge, then B11D
140: * contains the diagonal of the partially reduced top-left
141: * block.
142: *
143: * B11E (output) DOUBLE PRECISION array, dimension (Q-1)
144: * When DBBCSD converges, B11E contains zeros. If DBBCSD fails
145: * to converge, then B11E contains the superdiagonal of the
146: * partially reduced top-left block.
147: *
148: * B12D (output) DOUBLE PRECISION array, dimension (Q)
149: * When DBBCSD converges, B12D contains the negative sines of
150: * THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then
151: * B12D contains the diagonal of the partially reduced top-right
152: * block.
153: *
154: * B12E (output) DOUBLE PRECISION array, dimension (Q-1)
155: * When DBBCSD converges, B12E contains zeros. If DBBCSD fails
156: * to converge, then B12E contains the subdiagonal of the
157: * partially reduced top-right block.
158: *
159: * WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
160: * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
161: *
162: * LWORK (input) INTEGER
163: * The dimension of the array WORK. LWORK >= MAX(1,8*Q).
164: *
165: * If LWORK = -1, then a workspace query is assumed; the
166: * routine only calculates the optimal size of the WORK array,
167: * returns this value as the first entry of the work array, and
168: * no error message related to LWORK is issued by XERBLA.
169: *
170: * INFO (output) INTEGER
171: * = 0: successful exit.
172: * < 0: if INFO = -i, the i-th argument had an illegal value.
173: * > 0: if DBBCSD did not converge, INFO specifies the number
174: * of nonzero entries in PHI, and B11D, B11E, etc.,
175: * contain the partially reduced matrix.
176: *
177: * Reference
178: * =========
179: *
180: * [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
181: * Algorithms, 50(1):33-65, 2009.
182: *
183: * Internal Parameters
184: * ===================
185: *
186: * TOLMUL DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))
187: * TOLMUL controls the convergence criterion of the QR loop.
188: * Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
189: * are within TOLMUL*EPS of either bound.
190: *
191: * ===================================================================
192: *
193: * .. Parameters ..
194: INTEGER MAXITR
195: PARAMETER ( MAXITR = 6 )
196: DOUBLE PRECISION HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO
197: PARAMETER ( HUNDRED = 100.0D0, MEIGHTH = -0.125D0,
198: $ ONE = 1.0D0, PIOVER2 = 1.57079632679489662D0,
199: $ TEN = 10.0D0, ZERO = 0.0D0 )
200: DOUBLE PRECISION NEGONECOMPLEX
201: PARAMETER ( NEGONECOMPLEX = -1.0D0 )
202: * ..
203: * .. Local Scalars ..
204: LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12,
205: $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T,
206: $ WANTV2T
207: INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS,
208: $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J,
209: $ LWORKMIN, LWORKOPT, MAXIT, MINI
210: DOUBLE PRECISION B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY,
211: $ EPS, MU, NU, R, SIGMA11, SIGMA21,
212: $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL,
213: $ UNFL, X1, X2, Y1, Y2
214: *
215: * .. External Subroutines ..
216: EXTERNAL DLASR, DSCAL, DSWAP, DLARTGP, DLARTGS, DLAS2,
217: $ XERBLA
218: * ..
219: * .. External Functions ..
220: DOUBLE PRECISION DLAMCH
221: LOGICAL LSAME
222: EXTERNAL LSAME, DLAMCH
223: * ..
224: * .. Intrinsic Functions ..
225: INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT
226: * ..
227: * .. Executable Statements ..
228: *
229: * Test input arguments
230: *
231: INFO = 0
232: LQUERY = LWORK .EQ. -1
233: WANTU1 = LSAME( JOBU1, 'Y' )
234: WANTU2 = LSAME( JOBU2, 'Y' )
235: WANTV1T = LSAME( JOBV1T, 'Y' )
236: WANTV2T = LSAME( JOBV2T, 'Y' )
237: COLMAJOR = .NOT. LSAME( TRANS, 'T' )
238: *
239: IF( M .LT. 0 ) THEN
240: INFO = -6
241: ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
242: INFO = -7
243: ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
244: INFO = -8
245: ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN
246: INFO = -8
247: ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
248: INFO = -12
249: ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
250: INFO = -14
251: ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
252: INFO = -16
253: ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
254: INFO = -18
255: END IF
256: *
257: * Quick return if Q = 0
258: *
259: IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN
260: LWORKMIN = 1
261: WORK(1) = LWORKMIN
262: RETURN
263: END IF
264: *
265: * Compute workspace
266: *
267: IF( INFO .EQ. 0 ) THEN
268: IU1CS = 1
269: IU1SN = IU1CS + Q
270: IU2CS = IU1SN + Q
271: IU2SN = IU2CS + Q
272: IV1TCS = IU2SN + Q
273: IV1TSN = IV1TCS + Q
274: IV2TCS = IV1TSN + Q
275: IV2TSN = IV2TCS + Q
276: LWORKOPT = IV2TSN + Q - 1
277: LWORKMIN = LWORKOPT
278: WORK(1) = LWORKOPT
279: IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN
280: INFO = -28
281: END IF
282: END IF
283: *
284: IF( INFO .NE. 0 ) THEN
285: CALL XERBLA( 'DBBCSD', -INFO )
286: RETURN
287: ELSE IF( LQUERY ) THEN
288: RETURN
289: END IF
290: *
291: * Get machine constants
292: *
293: EPS = DLAMCH( 'Epsilon' )
294: UNFL = DLAMCH( 'Safe minimum' )
295: TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) )
296: TOL = TOLMUL*EPS
297: THRESH = MAX( TOL, MAXITR*Q*Q*UNFL )
298: *
299: * Test for negligible sines or cosines
300: *
301: DO I = 1, Q
302: IF( THETA(I) .LT. THRESH ) THEN
303: THETA(I) = ZERO
304: ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
305: THETA(I) = PIOVER2
306: END IF
307: END DO
308: DO I = 1, Q-1
309: IF( PHI(I) .LT. THRESH ) THEN
310: PHI(I) = ZERO
311: ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
312: PHI(I) = PIOVER2
313: END IF
314: END DO
315: *
316: * Initial deflation
317: *
318: IMAX = Q
319: DO WHILE( ( IMAX .GT. 1 ) .AND. ( PHI(IMAX-1) .EQ. ZERO ) )
320: IMAX = IMAX - 1
321: END DO
322: IMIN = IMAX - 1
323: IF ( IMIN .GT. 1 ) THEN
324: DO WHILE( PHI(IMIN-1) .NE. ZERO )
325: IMIN = IMIN - 1
326: IF ( IMIN .LE. 1 ) EXIT
327: END DO
328: END IF
329: *
330: * Initialize iteration counter
331: *
332: MAXIT = MAXITR*Q*Q
333: ITER = 0
334: *
335: * Begin main iteration loop
336: *
337: DO WHILE( IMAX .GT. 1 )
338: *
339: * Compute the matrix entries
340: *
341: B11D(IMIN) = COS( THETA(IMIN) )
342: B21D(IMIN) = -SIN( THETA(IMIN) )
343: DO I = IMIN, IMAX - 1
344: B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) )
345: B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) )
346: B12D(I) = SIN( THETA(I) ) * COS( PHI(I) )
347: B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) )
348: B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) )
349: B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) )
350: B22D(I) = COS( THETA(I) ) * COS( PHI(I) )
351: B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) )
352: END DO
353: B12D(IMAX) = SIN( THETA(IMAX) )
354: B22D(IMAX) = COS( THETA(IMAX) )
355: *
356: * Abort if not converging; otherwise, increment ITER
357: *
358: IF( ITER .GT. MAXIT ) THEN
359: INFO = 0
360: DO I = 1, Q
361: IF( PHI(I) .NE. ZERO )
362: $ INFO = INFO + 1
363: END DO
364: RETURN
365: END IF
366: *
367: ITER = ITER + IMAX - IMIN
368: *
369: * Compute shifts
370: *
371: THETAMAX = THETA(IMIN)
372: THETAMIN = THETA(IMIN)
373: DO I = IMIN+1, IMAX
374: IF( THETA(I) > THETAMAX )
375: $ THETAMAX = THETA(I)
376: IF( THETA(I) < THETAMIN )
377: $ THETAMIN = THETA(I)
378: END DO
379: *
380: IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN
381: *
382: * Zero on diagonals of B11 and B22; induce deflation with a
383: * zero shift
384: *
385: MU = ZERO
386: NU = ONE
387: *
388: ELSE IF( THETAMIN .LT. THRESH ) THEN
389: *
390: * Zero on diagonals of B12 and B22; induce deflation with a
391: * zero shift
392: *
393: MU = ONE
394: NU = ZERO
395: *
396: ELSE
397: *
398: * Compute shifts for B11 and B21 and use the lesser
399: *
400: CALL DLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11,
401: $ DUMMY )
402: CALL DLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21,
403: $ DUMMY )
404: *
405: IF( SIGMA11 .LE. SIGMA21 ) THEN
406: MU = SIGMA11
407: NU = SQRT( ONE - MU**2 )
408: IF( MU .LT. THRESH ) THEN
409: MU = ZERO
410: NU = ONE
411: END IF
412: ELSE
413: NU = SIGMA21
414: MU = SQRT( 1.0 - NU**2 )
415: IF( NU .LT. THRESH ) THEN
416: MU = ONE
417: NU = ZERO
418: END IF
419: END IF
420: END IF
421: *
422: * Rotate to produce bulges in B11 and B21
423: *
424: IF( MU .LE. NU ) THEN
425: CALL DLARTGS( B11D(IMIN), B11E(IMIN), MU,
426: $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) )
427: ELSE
428: CALL DLARTGS( B21D(IMIN), B21E(IMIN), NU,
429: $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) )
430: END IF
431: *
432: TEMP = WORK(IV1TCS+IMIN-1)*B11D(IMIN) +
433: $ WORK(IV1TSN+IMIN-1)*B11E(IMIN)
434: B11E(IMIN) = WORK(IV1TCS+IMIN-1)*B11E(IMIN) -
435: $ WORK(IV1TSN+IMIN-1)*B11D(IMIN)
436: B11D(IMIN) = TEMP
437: B11BULGE = WORK(IV1TSN+IMIN-1)*B11D(IMIN+1)
438: B11D(IMIN+1) = WORK(IV1TCS+IMIN-1)*B11D(IMIN+1)
439: TEMP = WORK(IV1TCS+IMIN-1)*B21D(IMIN) +
440: $ WORK(IV1TSN+IMIN-1)*B21E(IMIN)
441: B21E(IMIN) = WORK(IV1TCS+IMIN-1)*B21E(IMIN) -
442: $ WORK(IV1TSN+IMIN-1)*B21D(IMIN)
443: B21D(IMIN) = TEMP
444: B21BULGE = WORK(IV1TSN+IMIN-1)*B21D(IMIN+1)
445: B21D(IMIN+1) = WORK(IV1TCS+IMIN-1)*B21D(IMIN+1)
446: *
447: * Compute THETA(IMIN)
448: *
449: THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ),
450: $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) )
451: *
452: * Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
453: *
454: IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN
455: CALL DLARTGP( B11BULGE, B11D(IMIN), WORK(IU1SN+IMIN-1),
456: $ WORK(IU1CS+IMIN-1), R )
457: ELSE IF( MU .LE. NU ) THEN
458: CALL DLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU,
459: $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) )
460: ELSE
461: CALL DLARTGS( B12D( IMIN ), B12E( IMIN ), NU,
462: $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) )
463: END IF
464: IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN
465: CALL DLARTGP( B21BULGE, B21D(IMIN), WORK(IU2SN+IMIN-1),
466: $ WORK(IU2CS+IMIN-1), R )
467: ELSE IF( NU .LT. MU ) THEN
468: CALL DLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU,
469: $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) )
470: ELSE
471: CALL DLARTGS( B22D(IMIN), B22E(IMIN), MU,
472: $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) )
473: END IF
474: WORK(IU2CS+IMIN-1) = -WORK(IU2CS+IMIN-1)
475: WORK(IU2SN+IMIN-1) = -WORK(IU2SN+IMIN-1)
476: *
477: TEMP = WORK(IU1CS+IMIN-1)*B11E(IMIN) +
478: $ WORK(IU1SN+IMIN-1)*B11D(IMIN+1)
479: B11D(IMIN+1) = WORK(IU1CS+IMIN-1)*B11D(IMIN+1) -
480: $ WORK(IU1SN+IMIN-1)*B11E(IMIN)
481: B11E(IMIN) = TEMP
482: IF( IMAX .GT. IMIN+1 ) THEN
483: B11BULGE = WORK(IU1SN+IMIN-1)*B11E(IMIN+1)
484: B11E(IMIN+1) = WORK(IU1CS+IMIN-1)*B11E(IMIN+1)
485: END IF
486: TEMP = WORK(IU1CS+IMIN-1)*B12D(IMIN) +
487: $ WORK(IU1SN+IMIN-1)*B12E(IMIN)
488: B12E(IMIN) = WORK(IU1CS+IMIN-1)*B12E(IMIN) -
489: $ WORK(IU1SN+IMIN-1)*B12D(IMIN)
490: B12D(IMIN) = TEMP
491: B12BULGE = WORK(IU1SN+IMIN-1)*B12D(IMIN+1)
492: B12D(IMIN+1) = WORK(IU1CS+IMIN-1)*B12D(IMIN+1)
493: TEMP = WORK(IU2CS+IMIN-1)*B21E(IMIN) +
494: $ WORK(IU2SN+IMIN-1)*B21D(IMIN+1)
495: B21D(IMIN+1) = WORK(IU2CS+IMIN-1)*B21D(IMIN+1) -
496: $ WORK(IU2SN+IMIN-1)*B21E(IMIN)
497: B21E(IMIN) = TEMP
498: IF( IMAX .GT. IMIN+1 ) THEN
499: B21BULGE = WORK(IU2SN+IMIN-1)*B21E(IMIN+1)
500: B21E(IMIN+1) = WORK(IU2CS+IMIN-1)*B21E(IMIN+1)
501: END IF
502: TEMP = WORK(IU2CS+IMIN-1)*B22D(IMIN) +
503: $ WORK(IU2SN+IMIN-1)*B22E(IMIN)
504: B22E(IMIN) = WORK(IU2CS+IMIN-1)*B22E(IMIN) -
505: $ WORK(IU2SN+IMIN-1)*B22D(IMIN)
506: B22D(IMIN) = TEMP
507: B22BULGE = WORK(IU2SN+IMIN-1)*B22D(IMIN+1)
508: B22D(IMIN+1) = WORK(IU2CS+IMIN-1)*B22D(IMIN+1)
509: *
510: * Inner loop: chase bulges from B11(IMIN,IMIN+2),
511: * B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
512: * bottom-right
513: *
514: DO I = IMIN+1, IMAX-1
515: *
516: * Compute PHI(I-1)
517: *
518: X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1)
519: X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE
520: Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1)
521: Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE
522: *
523: PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) )
524: *
525: * Determine if there are bulges to chase or if a new direct
526: * summand has been reached
527: *
528: RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2
529: RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2
530: RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2
531: RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2
532: *
533: * If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
534: * B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
535: * chasing by applying the original shift again.
536: *
537: IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN
538: CALL DLARTGP( X2, X1, WORK(IV1TSN+I-1), WORK(IV1TCS+I-1),
539: $ R )
540: ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN
541: CALL DLARTGP( B11BULGE, B11E(I-1), WORK(IV1TSN+I-1),
542: $ WORK(IV1TCS+I-1), R )
543: ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN
544: CALL DLARTGP( B21BULGE, B21E(I-1), WORK(IV1TSN+I-1),
545: $ WORK(IV1TCS+I-1), R )
546: ELSE IF( MU .LE. NU ) THEN
547: CALL DLARTGS( B11D(I), B11E(I), MU, WORK(IV1TCS+I-1),
548: $ WORK(IV1TSN+I-1) )
549: ELSE
550: CALL DLARTGS( B21D(I), B21E(I), NU, WORK(IV1TCS+I-1),
551: $ WORK(IV1TSN+I-1) )
552: END IF
553: WORK(IV1TCS+I-1) = -WORK(IV1TCS+I-1)
554: WORK(IV1TSN+I-1) = -WORK(IV1TSN+I-1)
555: IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
556: CALL DLARTGP( Y2, Y1, WORK(IV2TSN+I-1-1),
557: $ WORK(IV2TCS+I-1-1), R )
558: ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
559: CALL DLARTGP( B12BULGE, B12D(I-1), WORK(IV2TSN+I-1-1),
560: $ WORK(IV2TCS+I-1-1), R )
561: ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
562: CALL DLARTGP( B22BULGE, B22D(I-1), WORK(IV2TSN+I-1-1),
563: $ WORK(IV2TCS+I-1-1), R )
564: ELSE IF( NU .LT. MU ) THEN
565: CALL DLARTGS( B12E(I-1), B12D(I), NU, WORK(IV2TCS+I-1-1),
566: $ WORK(IV2TSN+I-1-1) )
567: ELSE
568: CALL DLARTGS( B22E(I-1), B22D(I), MU, WORK(IV2TCS+I-1-1),
569: $ WORK(IV2TSN+I-1-1) )
570: END IF
571: *
572: TEMP = WORK(IV1TCS+I-1)*B11D(I) + WORK(IV1TSN+I-1)*B11E(I)
573: B11E(I) = WORK(IV1TCS+I-1)*B11E(I) -
574: $ WORK(IV1TSN+I-1)*B11D(I)
575: B11D(I) = TEMP
576: B11BULGE = WORK(IV1TSN+I-1)*B11D(I+1)
577: B11D(I+1) = WORK(IV1TCS+I-1)*B11D(I+1)
578: TEMP = WORK(IV1TCS+I-1)*B21D(I) + WORK(IV1TSN+I-1)*B21E(I)
579: B21E(I) = WORK(IV1TCS+I-1)*B21E(I) -
580: $ WORK(IV1TSN+I-1)*B21D(I)
581: B21D(I) = TEMP
582: B21BULGE = WORK(IV1TSN+I-1)*B21D(I+1)
583: B21D(I+1) = WORK(IV1TCS+I-1)*B21D(I+1)
584: TEMP = WORK(IV2TCS+I-1-1)*B12E(I-1) +
585: $ WORK(IV2TSN+I-1-1)*B12D(I)
586: B12D(I) = WORK(IV2TCS+I-1-1)*B12D(I) -
587: $ WORK(IV2TSN+I-1-1)*B12E(I-1)
588: B12E(I-1) = TEMP
589: B12BULGE = WORK(IV2TSN+I-1-1)*B12E(I)
590: B12E(I) = WORK(IV2TCS+I-1-1)*B12E(I)
591: TEMP = WORK(IV2TCS+I-1-1)*B22E(I-1) +
592: $ WORK(IV2TSN+I-1-1)*B22D(I)
593: B22D(I) = WORK(IV2TCS+I-1-1)*B22D(I) -
594: $ WORK(IV2TSN+I-1-1)*B22E(I-1)
595: B22E(I-1) = TEMP
596: B22BULGE = WORK(IV2TSN+I-1-1)*B22E(I)
597: B22E(I) = WORK(IV2TCS+I-1-1)*B22E(I)
598: *
599: * Compute THETA(I)
600: *
601: X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1)
602: X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE
603: Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1)
604: Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE
605: *
606: THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) )
607: *
608: * Determine if there are bulges to chase or if a new direct
609: * summand has been reached
610: *
611: RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2
612: RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2
613: RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2
614: RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2
615: *
616: * If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
617: * B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
618: * chasing by applying the original shift again.
619: *
620: IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN
621: CALL DLARTGP( X2, X1, WORK(IU1SN+I-1), WORK(IU1CS+I-1),
622: $ R )
623: ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN
624: CALL DLARTGP( B11BULGE, B11D(I), WORK(IU1SN+I-1),
625: $ WORK(IU1CS+I-1), R )
626: ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN
627: CALL DLARTGP( B12BULGE, B12E(I-1), WORK(IU1SN+I-1),
628: $ WORK(IU1CS+I-1), R )
629: ELSE IF( MU .LE. NU ) THEN
630: CALL DLARTGS( B11E(I), B11D(I+1), MU, WORK(IU1CS+I-1),
631: $ WORK(IU1SN+I-1) )
632: ELSE
633: CALL DLARTGS( B12D(I), B12E(I), NU, WORK(IU1CS+I-1),
634: $ WORK(IU1SN+I-1) )
635: END IF
636: IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN
637: CALL DLARTGP( Y2, Y1, WORK(IU2SN+I-1), WORK(IU2CS+I-1),
638: $ R )
639: ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN
640: CALL DLARTGP( B21BULGE, B21D(I), WORK(IU2SN+I-1),
641: $ WORK(IU2CS+I-1), R )
642: ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN
643: CALL DLARTGP( B22BULGE, B22E(I-1), WORK(IU2SN+I-1),
644: $ WORK(IU2CS+I-1), R )
645: ELSE IF( NU .LT. MU ) THEN
646: CALL DLARTGS( B21E(I), B21E(I+1), NU, WORK(IU2CS+I-1),
647: $ WORK(IU2SN+I-1) )
648: ELSE
649: CALL DLARTGS( B22D(I), B22E(I), MU, WORK(IU2CS+I-1),
650: $ WORK(IU2SN+I-1) )
651: END IF
652: WORK(IU2CS+I-1) = -WORK(IU2CS+I-1)
653: WORK(IU2SN+I-1) = -WORK(IU2SN+I-1)
654: *
655: TEMP = WORK(IU1CS+I-1)*B11E(I) + WORK(IU1SN+I-1)*B11D(I+1)
656: B11D(I+1) = WORK(IU1CS+I-1)*B11D(I+1) -
657: $ WORK(IU1SN+I-1)*B11E(I)
658: B11E(I) = TEMP
659: IF( I .LT. IMAX - 1 ) THEN
660: B11BULGE = WORK(IU1SN+I-1)*B11E(I+1)
661: B11E(I+1) = WORK(IU1CS+I-1)*B11E(I+1)
662: END IF
663: TEMP = WORK(IU2CS+I-1)*B21E(I) + WORK(IU2SN+I-1)*B21D(I+1)
664: B21D(I+1) = WORK(IU2CS+I-1)*B21D(I+1) -
665: $ WORK(IU2SN+I-1)*B21E(I)
666: B21E(I) = TEMP
667: IF( I .LT. IMAX - 1 ) THEN
668: B21BULGE = WORK(IU2SN+I-1)*B21E(I+1)
669: B21E(I+1) = WORK(IU2CS+I-1)*B21E(I+1)
670: END IF
671: TEMP = WORK(IU1CS+I-1)*B12D(I) + WORK(IU1SN+I-1)*B12E(I)
672: B12E(I) = WORK(IU1CS+I-1)*B12E(I) - WORK(IU1SN+I-1)*B12D(I)
673: B12D(I) = TEMP
674: B12BULGE = WORK(IU1SN+I-1)*B12D(I+1)
675: B12D(I+1) = WORK(IU1CS+I-1)*B12D(I+1)
676: TEMP = WORK(IU2CS+I-1)*B22D(I) + WORK(IU2SN+I-1)*B22E(I)
677: B22E(I) = WORK(IU2CS+I-1)*B22E(I) - WORK(IU2SN+I-1)*B22D(I)
678: B22D(I) = TEMP
679: B22BULGE = WORK(IU2SN+I-1)*B22D(I+1)
680: B22D(I+1) = WORK(IU2CS+I-1)*B22D(I+1)
681: *
682: END DO
683: *
684: * Compute PHI(IMAX-1)
685: *
686: X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) +
687: $ COS(THETA(IMAX-1))*B21E(IMAX-1)
688: Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) +
689: $ COS(THETA(IMAX-1))*B22D(IMAX-1)
690: Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE
691: *
692: PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) )
693: *
694: * Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
695: *
696: RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2
697: RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2
698: *
699: IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
700: CALL DLARTGP( Y2, Y1, WORK(IV2TSN+IMAX-1-1),
701: $ WORK(IV2TCS+IMAX-1-1), R )
702: ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
703: CALL DLARTGP( B12BULGE, B12D(IMAX-1), WORK(IV2TSN+IMAX-1-1),
704: $ WORK(IV2TCS+IMAX-1-1), R )
705: ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
706: CALL DLARTGP( B22BULGE, B22D(IMAX-1), WORK(IV2TSN+IMAX-1-1),
707: $ WORK(IV2TCS+IMAX-1-1), R )
708: ELSE IF( NU .LT. MU ) THEN
709: CALL DLARTGS( B12E(IMAX-1), B12D(IMAX), NU,
710: $ WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) )
711: ELSE
712: CALL DLARTGS( B22E(IMAX-1), B22D(IMAX), MU,
713: $ WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) )
714: END IF
715: *
716: TEMP = WORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) +
717: $ WORK(IV2TSN+IMAX-1-1)*B12D(IMAX)
718: B12D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B12D(IMAX) -
719: $ WORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1)
720: B12E(IMAX-1) = TEMP
721: TEMP = WORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) +
722: $ WORK(IV2TSN+IMAX-1-1)*B22D(IMAX)
723: B22D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B22D(IMAX) -
724: $ WORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1)
725: B22E(IMAX-1) = TEMP
726: *
727: * Update singular vectors
728: *
729: IF( WANTU1 ) THEN
730: IF( COLMAJOR ) THEN
731: CALL DLASR( 'R', 'V', 'F', P, IMAX-IMIN+1,
732: $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1),
733: $ U1(1,IMIN), LDU1 )
734: ELSE
735: CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, P,
736: $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1),
737: $ U1(IMIN,1), LDU1 )
738: END IF
739: END IF
740: IF( WANTU2 ) THEN
741: IF( COLMAJOR ) THEN
742: CALL DLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1,
743: $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1),
744: $ U2(1,IMIN), LDU2 )
745: ELSE
746: CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P,
747: $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1),
748: $ U2(IMIN,1), LDU2 )
749: END IF
750: END IF
751: IF( WANTV1T ) THEN
752: IF( COLMAJOR ) THEN
753: CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q,
754: $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1),
755: $ V1T(IMIN,1), LDV1T )
756: ELSE
757: CALL DLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1,
758: $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1),
759: $ V1T(1,IMIN), LDV1T )
760: END IF
761: END IF
762: IF( WANTV2T ) THEN
763: IF( COLMAJOR ) THEN
764: CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q,
765: $ WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1),
766: $ V2T(IMIN,1), LDV2T )
767: ELSE
768: CALL DLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1,
769: $ WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1),
770: $ V2T(1,IMIN), LDV2T )
771: END IF
772: END IF
773: *
774: * Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
775: *
776: IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN
777: B11D(IMAX) = -B11D(IMAX)
778: B21D(IMAX) = -B21D(IMAX)
779: IF( WANTV1T ) THEN
780: IF( COLMAJOR ) THEN
781: CALL DSCAL( Q, NEGONECOMPLEX, V1T(IMAX,1), LDV1T )
782: ELSE
783: CALL DSCAL( Q, NEGONECOMPLEX, V1T(1,IMAX), 1 )
784: END IF
785: END IF
786: END IF
787: *
788: * Compute THETA(IMAX)
789: *
790: X1 = COS(PHI(IMAX-1))*B11D(IMAX) +
791: $ SIN(PHI(IMAX-1))*B12E(IMAX-1)
792: Y1 = COS(PHI(IMAX-1))*B21D(IMAX) +
793: $ SIN(PHI(IMAX-1))*B22E(IMAX-1)
794: *
795: THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) )
796: *
797: * Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
798: * and B22(IMAX,IMAX-1)
799: *
800: IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN
801: B12D(IMAX) = -B12D(IMAX)
802: IF( WANTU1 ) THEN
803: IF( COLMAJOR ) THEN
804: CALL DSCAL( P, NEGONECOMPLEX, U1(1,IMAX), 1 )
805: ELSE
806: CALL DSCAL( P, NEGONECOMPLEX, U1(IMAX,1), LDU1 )
807: END IF
808: END IF
809: END IF
810: IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN
811: B22D(IMAX) = -B22D(IMAX)
812: IF( WANTU2 ) THEN
813: IF( COLMAJOR ) THEN
814: CALL DSCAL( M-P, NEGONECOMPLEX, U2(1,IMAX), 1 )
815: ELSE
816: CALL DSCAL( M-P, NEGONECOMPLEX, U2(IMAX,1), LDU2 )
817: END IF
818: END IF
819: END IF
820: *
821: * Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
822: *
823: IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN
824: IF( WANTV2T ) THEN
825: IF( COLMAJOR ) THEN
826: CALL DSCAL( M-Q, NEGONECOMPLEX, V2T(IMAX,1), LDV2T )
827: ELSE
828: CALL DSCAL( M-Q, NEGONECOMPLEX, V2T(1,IMAX), 1 )
829: END IF
830: END IF
831: END IF
832: *
833: * Test for negligible sines or cosines
834: *
835: DO I = IMIN, IMAX
836: IF( THETA(I) .LT. THRESH ) THEN
837: THETA(I) = ZERO
838: ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
839: THETA(I) = PIOVER2
840: END IF
841: END DO
842: DO I = IMIN, IMAX-1
843: IF( PHI(I) .LT. THRESH ) THEN
844: PHI(I) = ZERO
845: ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
846: PHI(I) = PIOVER2
847: END IF
848: END DO
849: *
850: * Deflate
851: *
852: IF (IMAX .GT. 1) THEN
853: DO WHILE( PHI(IMAX-1) .EQ. ZERO )
854: IMAX = IMAX - 1
855: IF (IMAX .LE. 1) EXIT
856: END DO
857: END IF
858: IF( IMIN .GT. IMAX - 1 )
859: $ IMIN = IMAX - 1
860: IF (IMIN .GT. 1) THEN
861: DO WHILE (PHI(IMIN-1) .NE. ZERO)
862: IMIN = IMIN - 1
863: IF (IMIN .LE. 1) EXIT
864: END DO
865: END IF
866: *
867: * Repeat main iteration loop
868: *
869: END DO
870: *
871: * Postprocessing: order THETA from least to greatest
872: *
873: DO I = 1, Q
874: *
875: MINI = I
876: THETAMIN = THETA(I)
877: DO J = I+1, Q
878: IF( THETA(J) .LT. THETAMIN ) THEN
879: MINI = J
880: THETAMIN = THETA(J)
881: END IF
882: END DO
883: *
884: IF( MINI .NE. I ) THEN
885: THETA(MINI) = THETA(I)
886: THETA(I) = THETAMIN
887: IF( COLMAJOR ) THEN
888: IF( WANTU1 )
889: $ CALL DSWAP( P, U1(1,I), 1, U1(1,MINI), 1 )
890: IF( WANTU2 )
891: $ CALL DSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 )
892: IF( WANTV1T )
893: $ CALL DSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T )
894: IF( WANTV2T )
895: $ CALL DSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1),
896: $ LDV2T )
897: ELSE
898: IF( WANTU1 )
899: $ CALL DSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 )
900: IF( WANTU2 )
901: $ CALL DSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 )
902: IF( WANTV1T )
903: $ CALL DSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 )
904: IF( WANTV2T )
905: $ CALL DSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 )
906: END IF
907: END IF
908: *
909: END DO
910: *
911: RETURN
912: *
913: * End of DBBCSD
914: *
915: END
916:
CVSweb interface <joel.bertrand@systella.fr>