1: SUBROUTINE ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,
2: $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,
3: $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,
4: $ B22D, B22E, RWORK, LRWORK, INFO )
5: IMPLICIT NONE
6: *
7: * -- LAPACK routine (version 3.3.0) --
8: *
9: * -- Contributed by Brian Sutton of the Randolph-Macon College --
10: * -- November 2010
11: *
12: * -- LAPACK is a software package provided by Univ. of Tennessee, --
13: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
14: *
15: * .. Scalar Arguments ..
16: CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
17: INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LRWORK, M, P, Q
18: * ..
19: * .. Array Arguments ..
20: DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ),
21: $ B21D( * ), B21E( * ), B22D( * ), B22E( * ),
22: $ PHI( * ), THETA( * ), RWORK( * )
23: COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
24: $ V2T( LDV2T, * )
25: * ..
26: *
27: * Purpose
28: * =======
29: *
30: * ZBBCSD computes the CS decomposition of a unitary matrix in
31: * bidiagonal-block form,
32: *
33: *
34: * [ B11 | B12 0 0 ]
35: * [ 0 | 0 -I 0 ]
36: * X = [----------------]
37: * [ B21 | B22 0 0 ]
38: * [ 0 | 0 0 I ]
39: *
40: * [ C | -S 0 0 ]
41: * [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H
42: * = [---------] [---------------] [---------] .
43: * [ | U2 ] [ S | C 0 0 ] [ | V2 ]
44: * [ 0 | 0 0 I ]
45: *
46: * X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
47: * than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
48: * transposed and/or permuted. This can be done in constant time using
49: * the TRANS and SIGNS options. See ZUNCSD for details.)
50: *
51: * The bidiagonal matrices B11, B12, B21, and B22 are represented
52: * implicitly by angles THETA(1:Q) and PHI(1:Q-1).
53: *
54: * The unitary matrices U1, U2, V1T, and V2T are input/output.
55: * The input matrices are pre- or post-multiplied by the appropriate
56: * singular vector matrices.
57: *
58: * Arguments
59: * =========
60: *
61: * JOBU1 (input) CHARACTER
62: * = 'Y': U1 is updated;
63: * otherwise: U1 is not updated.
64: *
65: * JOBU2 (input) CHARACTER
66: * = 'Y': U2 is updated;
67: * otherwise: U2 is not updated.
68: *
69: * JOBV1T (input) CHARACTER
70: * = 'Y': V1T is updated;
71: * otherwise: V1T is not updated.
72: *
73: * JOBV2T (input) CHARACTER
74: * = 'Y': V2T is updated;
75: * otherwise: V2T is not updated.
76: *
77: * TRANS (input) CHARACTER
78: * = 'T': X, U1, U2, V1T, and V2T are stored in row-major
79: * order;
80: * otherwise: X, U1, U2, V1T, and V2T are stored in column-
81: * major order.
82: *
83: * M (input) INTEGER
84: * The number of rows and columns in X, the unitary matrix in
85: * bidiagonal-block form.
86: *
87: * P (input) INTEGER
88: * The number of rows in the top-left block of X. 0 <= P <= M.
89: *
90: * Q (input) INTEGER
91: * The number of columns in the top-left block of X.
92: * 0 <= Q <= MIN(P,M-P,M-Q).
93: *
94: * THETA (input/output) DOUBLE PRECISION array, dimension (Q)
95: * On entry, the angles THETA(1),...,THETA(Q) that, along with
96: * PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
97: * form. On exit, the angles whose cosines and sines define the
98: * diagonal blocks in the CS decomposition.
99: *
100: * PHI (input/workspace) DOUBLE PRECISION array, dimension (Q-1)
101: * The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
102: * THETA(Q), define the matrix in bidiagonal-block form.
103: *
104: * U1 (input/output) COMPLEX*16 array, dimension (LDU1,P)
105: * On entry, an LDU1-by-P matrix. On exit, U1 is postmultiplied
106: * by the left singular vector matrix common to [ B11 ; 0 ] and
107: * [ B12 0 0 ; 0 -I 0 0 ].
108: *
109: * LDU1 (input) INTEGER
110: * The leading dimension of the array U1.
111: *
112: * U2 (input/output) COMPLEX*16 array, dimension (LDU2,M-P)
113: * On entry, an LDU2-by-(M-P) matrix. On exit, U2 is
114: * postmultiplied by the left singular vector matrix common to
115: * [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
116: *
117: * LDU2 (input) INTEGER
118: * The leading dimension of the array U2.
119: *
120: * V1T (input/output) COMPLEX*16 array, dimension (LDV1T,Q)
121: * On entry, a LDV1T-by-Q matrix. On exit, V1T is premultiplied
122: * by the conjugate transpose of the right singular vector
123: * matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
124: *
125: * LDV1T (input) INTEGER
126: * The leading dimension of the array V1T.
127: *
128: * V2T (input/output) COMPLEX*16 array, dimenison (LDV2T,M-Q)
129: * On entry, a LDV2T-by-(M-Q) matrix. On exit, V2T is
130: * premultiplied by the conjugate transpose of the right
131: * singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
132: * [ B22 0 0 ; 0 0 I ].
133: *
134: * LDV2T (input) INTEGER
135: * The leading dimension of the array V2T.
136: *
137: * B11D (output) DOUBLE PRECISION array, dimension (Q)
138: * When ZBBCSD converges, B11D contains the cosines of THETA(1),
139: * ..., THETA(Q). If ZBBCSD fails to converge, then B11D
140: * contains the diagonal of the partially reduced top-left
141: * block.
142: *
143: * B11E (output) DOUBLE PRECISION array, dimension (Q-1)
144: * When ZBBCSD converges, B11E contains zeros. If ZBBCSD fails
145: * to converge, then B11E contains the superdiagonal of the
146: * partially reduced top-left block.
147: *
148: * B12D (output) DOUBLE PRECISION array, dimension (Q)
149: * When ZBBCSD converges, B12D contains the negative sines of
150: * THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then
151: * B12D contains the diagonal of the partially reduced top-right
152: * block.
153: *
154: * B12E (output) DOUBLE PRECISION array, dimension (Q-1)
155: * When ZBBCSD converges, B12E contains zeros. If ZBBCSD fails
156: * to converge, then B12E contains the subdiagonal of the
157: * partially reduced top-right block.
158: *
159: * RWORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
160: * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
161: *
162: * LRWORK (input) INTEGER
163: * The dimension of the array RWORK. LRWORK >= MAX(1,8*Q).
164: *
165: * If LRWORK = -1, then a workspace query is assumed; the
166: * routine only calculates the optimal size of the RWORK array,
167: * returns this value as the first entry of the work array, and
168: * no error message related to LRWORK is issued by XERBLA.
169: *
170: * INFO (output) INTEGER
171: * = 0: successful exit.
172: * < 0: if INFO = -i, the i-th argument had an illegal value.
173: * > 0: if ZBBCSD did not converge, INFO specifies the number
174: * of nonzero entries in PHI, and B11D, B11E, etc.,
175: * contain the partially reduced matrix.
176: *
177: * Reference
178: * =========
179: *
180: * [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
181: * Algorithms, 50(1):33-65, 2009.
182: *
183: * Internal Parameters
184: * ===================
185: *
186: * TOLMUL DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))
187: * TOLMUL controls the convergence criterion of the QR loop.
188: * Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
189: * are within TOLMUL*EPS of either bound.
190: *
191: * ===================================================================
192: *
193: * .. Parameters ..
194: INTEGER MAXITR
195: PARAMETER ( MAXITR = 6 )
196: DOUBLE PRECISION HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO
197: PARAMETER ( HUNDRED = 100.0D0, MEIGHTH = -0.125D0,
198: $ ONE = 1.0D0, PIOVER2 = 1.57079632679489662D0,
199: $ TEN = 10.0D0, ZERO = 0.0D0 )
200: COMPLEX*16 NEGONECOMPLEX
201: PARAMETER ( NEGONECOMPLEX = (-1.0D0,0.0D0) )
202: * ..
203: * .. Local Scalars ..
204: LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12,
205: $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T,
206: $ WANTV2T
207: INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS,
208: $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J,
209: $ LRWORKMIN, LRWORKOPT, MAXIT, MINI
210: DOUBLE PRECISION B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY,
211: $ EPS, MU, NU, R, SIGMA11, SIGMA21,
212: $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL,
213: $ UNFL, X1, X2, Y1, Y2
214: *
215: EXTERNAL DLARTGP, DLARTGS, DLAS2, XERBLA, ZLASR, ZSCAL,
216: $ ZSWAP
217: * ..
218: * .. External Functions ..
219: DOUBLE PRECISION DLAMCH
220: LOGICAL LSAME
221: EXTERNAL LSAME, DLAMCH
222: * ..
223: * .. Intrinsic Functions ..
224: INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT
225: * ..
226: * .. Executable Statements ..
227: *
228: * Test input arguments
229: *
230: INFO = 0
231: LQUERY = LRWORK .EQ. -1
232: WANTU1 = LSAME( JOBU1, 'Y' )
233: WANTU2 = LSAME( JOBU2, 'Y' )
234: WANTV1T = LSAME( JOBV1T, 'Y' )
235: WANTV2T = LSAME( JOBV2T, 'Y' )
236: COLMAJOR = .NOT. LSAME( TRANS, 'T' )
237: *
238: IF( M .LT. 0 ) THEN
239: INFO = -6
240: ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
241: INFO = -7
242: ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
243: INFO = -8
244: ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN
245: INFO = -8
246: ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
247: INFO = -12
248: ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
249: INFO = -14
250: ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
251: INFO = -16
252: ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
253: INFO = -18
254: END IF
255: *
256: * Quick return if Q = 0
257: *
258: IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN
259: LRWORKMIN = 1
260: RWORK(1) = LRWORKMIN
261: RETURN
262: END IF
263: *
264: * Compute workspace
265: *
266: IF( INFO .EQ. 0 ) THEN
267: IU1CS = 1
268: IU1SN = IU1CS + Q
269: IU2CS = IU1SN + Q
270: IU2SN = IU2CS + Q
271: IV1TCS = IU2SN + Q
272: IV1TSN = IV1TCS + Q
273: IV2TCS = IV1TSN + Q
274: IV2TSN = IV2TCS + Q
275: LRWORKOPT = IV2TSN + Q - 1
276: LRWORKMIN = LRWORKOPT
277: RWORK(1) = LRWORKOPT
278: IF( LRWORK .LT. LRWORKMIN .AND. .NOT. LQUERY ) THEN
279: INFO = -28
280: END IF
281: END IF
282: *
283: IF( INFO .NE. 0 ) THEN
284: CALL XERBLA( 'ZBBCSD', -INFO )
285: RETURN
286: ELSE IF( LQUERY ) THEN
287: RETURN
288: END IF
289: *
290: * Get machine constants
291: *
292: EPS = DLAMCH( 'Epsilon' )
293: UNFL = DLAMCH( 'Safe minimum' )
294: TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) )
295: TOL = TOLMUL*EPS
296: THRESH = MAX( TOL, MAXITR*Q*Q*UNFL )
297: *
298: * Test for negligible sines or cosines
299: *
300: DO I = 1, Q
301: IF( THETA(I) .LT. THRESH ) THEN
302: THETA(I) = ZERO
303: ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
304: THETA(I) = PIOVER2
305: END IF
306: END DO
307: DO I = 1, Q-1
308: IF( PHI(I) .LT. THRESH ) THEN
309: PHI(I) = ZERO
310: ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
311: PHI(I) = PIOVER2
312: END IF
313: END DO
314: *
315: * Initial deflation
316: *
317: IMAX = Q
318: DO WHILE( ( IMAX .GT. 1 ) .AND. ( PHI(IMAX-1) .EQ. ZERO ) )
319: IMAX = IMAX - 1
320: END DO
321: IMIN = IMAX - 1
322: IF ( IMIN .GT. 1 ) THEN
323: DO WHILE( PHI(IMIN-1) .NE. ZERO )
324: IMIN = IMIN - 1
325: IF ( IMIN .LE. 1 ) EXIT
326: END DO
327: END IF
328: *
329: * Initialize iteration counter
330: *
331: MAXIT = MAXITR*Q*Q
332: ITER = 0
333: *
334: * Begin main iteration loop
335: *
336: DO WHILE( IMAX .GT. 1 )
337: *
338: * Compute the matrix entries
339: *
340: B11D(IMIN) = COS( THETA(IMIN) )
341: B21D(IMIN) = -SIN( THETA(IMIN) )
342: DO I = IMIN, IMAX - 1
343: B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) )
344: B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) )
345: B12D(I) = SIN( THETA(I) ) * COS( PHI(I) )
346: B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) )
347: B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) )
348: B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) )
349: B22D(I) = COS( THETA(I) ) * COS( PHI(I) )
350: B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) )
351: END DO
352: B12D(IMAX) = SIN( THETA(IMAX) )
353: B22D(IMAX) = COS( THETA(IMAX) )
354: *
355: * Abort if not converging; otherwise, increment ITER
356: *
357: IF( ITER .GT. MAXIT ) THEN
358: INFO = 0
359: DO I = 1, Q
360: IF( PHI(I) .NE. ZERO )
361: $ INFO = INFO + 1
362: END DO
363: RETURN
364: END IF
365: *
366: ITER = ITER + IMAX - IMIN
367: *
368: * Compute shifts
369: *
370: THETAMAX = THETA(IMIN)
371: THETAMIN = THETA(IMIN)
372: DO I = IMIN+1, IMAX
373: IF( THETA(I) > THETAMAX )
374: $ THETAMAX = THETA(I)
375: IF( THETA(I) < THETAMIN )
376: $ THETAMIN = THETA(I)
377: END DO
378: *
379: IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN
380: *
381: * Zero on diagonals of B11 and B22; induce deflation with a
382: * zero shift
383: *
384: MU = ZERO
385: NU = ONE
386: *
387: ELSE IF( THETAMIN .LT. THRESH ) THEN
388: *
389: * Zero on diagonals of B12 and B22; induce deflation with a
390: * zero shift
391: *
392: MU = ONE
393: NU = ZERO
394: *
395: ELSE
396: *
397: * Compute shifts for B11 and B21 and use the lesser
398: *
399: CALL DLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11,
400: $ DUMMY )
401: CALL DLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21,
402: $ DUMMY )
403: *
404: IF( SIGMA11 .LE. SIGMA21 ) THEN
405: MU = SIGMA11
406: NU = SQRT( ONE - MU**2 )
407: IF( MU .LT. THRESH ) THEN
408: MU = ZERO
409: NU = ONE
410: END IF
411: ELSE
412: NU = SIGMA21
413: MU = SQRT( 1.0 - NU**2 )
414: IF( NU .LT. THRESH ) THEN
415: MU = ONE
416: NU = ZERO
417: END IF
418: END IF
419: END IF
420: *
421: * Rotate to produce bulges in B11 and B21
422: *
423: IF( MU .LE. NU ) THEN
424: CALL DLARTGS( B11D(IMIN), B11E(IMIN), MU,
425: $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) )
426: ELSE
427: CALL DLARTGS( B21D(IMIN), B21E(IMIN), NU,
428: $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) )
429: END IF
430: *
431: TEMP = RWORK(IV1TCS+IMIN-1)*B11D(IMIN) +
432: $ RWORK(IV1TSN+IMIN-1)*B11E(IMIN)
433: B11E(IMIN) = RWORK(IV1TCS+IMIN-1)*B11E(IMIN) -
434: $ RWORK(IV1TSN+IMIN-1)*B11D(IMIN)
435: B11D(IMIN) = TEMP
436: B11BULGE = RWORK(IV1TSN+IMIN-1)*B11D(IMIN+1)
437: B11D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B11D(IMIN+1)
438: TEMP = RWORK(IV1TCS+IMIN-1)*B21D(IMIN) +
439: $ RWORK(IV1TSN+IMIN-1)*B21E(IMIN)
440: B21E(IMIN) = RWORK(IV1TCS+IMIN-1)*B21E(IMIN) -
441: $ RWORK(IV1TSN+IMIN-1)*B21D(IMIN)
442: B21D(IMIN) = TEMP
443: B21BULGE = RWORK(IV1TSN+IMIN-1)*B21D(IMIN+1)
444: B21D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B21D(IMIN+1)
445: *
446: * Compute THETA(IMIN)
447: *
448: THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ),
449: $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) )
450: *
451: * Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
452: *
453: IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN
454: CALL DLARTGP( B11BULGE, B11D(IMIN), RWORK(IU1SN+IMIN-1),
455: $ RWORK(IU1CS+IMIN-1), R )
456: ELSE IF( MU .LE. NU ) THEN
457: CALL DLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU,
458: $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) )
459: ELSE
460: CALL DLARTGS( B12D( IMIN ), B12E( IMIN ), NU,
461: $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) )
462: END IF
463: IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN
464: CALL DLARTGP( B21BULGE, B21D(IMIN), RWORK(IU2SN+IMIN-1),
465: $ RWORK(IU2CS+IMIN-1), R )
466: ELSE IF( NU .LT. MU ) THEN
467: CALL DLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU,
468: $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) )
469: ELSE
470: CALL DLARTGS( B22D(IMIN), B22E(IMIN), MU,
471: $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) )
472: END IF
473: RWORK(IU2CS+IMIN-1) = -RWORK(IU2CS+IMIN-1)
474: RWORK(IU2SN+IMIN-1) = -RWORK(IU2SN+IMIN-1)
475: *
476: TEMP = RWORK(IU1CS+IMIN-1)*B11E(IMIN) +
477: $ RWORK(IU1SN+IMIN-1)*B11D(IMIN+1)
478: B11D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11D(IMIN+1) -
479: $ RWORK(IU1SN+IMIN-1)*B11E(IMIN)
480: B11E(IMIN) = TEMP
481: IF( IMAX .GT. IMIN+1 ) THEN
482: B11BULGE = RWORK(IU1SN+IMIN-1)*B11E(IMIN+1)
483: B11E(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11E(IMIN+1)
484: END IF
485: TEMP = RWORK(IU1CS+IMIN-1)*B12D(IMIN) +
486: $ RWORK(IU1SN+IMIN-1)*B12E(IMIN)
487: B12E(IMIN) = RWORK(IU1CS+IMIN-1)*B12E(IMIN) -
488: $ RWORK(IU1SN+IMIN-1)*B12D(IMIN)
489: B12D(IMIN) = TEMP
490: B12BULGE = RWORK(IU1SN+IMIN-1)*B12D(IMIN+1)
491: B12D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B12D(IMIN+1)
492: TEMP = RWORK(IU2CS+IMIN-1)*B21E(IMIN) +
493: $ RWORK(IU2SN+IMIN-1)*B21D(IMIN+1)
494: B21D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21D(IMIN+1) -
495: $ RWORK(IU2SN+IMIN-1)*B21E(IMIN)
496: B21E(IMIN) = TEMP
497: IF( IMAX .GT. IMIN+1 ) THEN
498: B21BULGE = RWORK(IU2SN+IMIN-1)*B21E(IMIN+1)
499: B21E(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21E(IMIN+1)
500: END IF
501: TEMP = RWORK(IU2CS+IMIN-1)*B22D(IMIN) +
502: $ RWORK(IU2SN+IMIN-1)*B22E(IMIN)
503: B22E(IMIN) = RWORK(IU2CS+IMIN-1)*B22E(IMIN) -
504: $ RWORK(IU2SN+IMIN-1)*B22D(IMIN)
505: B22D(IMIN) = TEMP
506: B22BULGE = RWORK(IU2SN+IMIN-1)*B22D(IMIN+1)
507: B22D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B22D(IMIN+1)
508: *
509: * Inner loop: chase bulges from B11(IMIN,IMIN+2),
510: * B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
511: * bottom-right
512: *
513: DO I = IMIN+1, IMAX-1
514: *
515: * Compute PHI(I-1)
516: *
517: X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1)
518: X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE
519: Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1)
520: Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE
521: *
522: PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) )
523: *
524: * Determine if there are bulges to chase or if a new direct
525: * summand has been reached
526: *
527: RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2
528: RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2
529: RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2
530: RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2
531: *
532: * If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
533: * B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
534: * chasing by applying the original shift again.
535: *
536: IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN
537: CALL DLARTGP( X2, X1, RWORK(IV1TSN+I-1),
538: $ RWORK(IV1TCS+I-1), R )
539: ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN
540: CALL DLARTGP( B11BULGE, B11E(I-1), RWORK(IV1TSN+I-1),
541: $ RWORK(IV1TCS+I-1), R )
542: ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN
543: CALL DLARTGP( B21BULGE, B21E(I-1), RWORK(IV1TSN+I-1),
544: $ RWORK(IV1TCS+I-1), R )
545: ELSE IF( MU .LE. NU ) THEN
546: CALL DLARTGS( B11D(I), B11E(I), MU, RWORK(IV1TCS+I-1),
547: $ RWORK(IV1TSN+I-1) )
548: ELSE
549: CALL DLARTGS( B21D(I), B21E(I), NU, RWORK(IV1TCS+I-1),
550: $ RWORK(IV1TSN+I-1) )
551: END IF
552: RWORK(IV1TCS+I-1) = -RWORK(IV1TCS+I-1)
553: RWORK(IV1TSN+I-1) = -RWORK(IV1TSN+I-1)
554: IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
555: CALL DLARTGP( Y2, Y1, RWORK(IV2TSN+I-1-1),
556: $ RWORK(IV2TCS+I-1-1), R )
557: ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
558: CALL DLARTGP( B12BULGE, B12D(I-1), RWORK(IV2TSN+I-1-1),
559: $ RWORK(IV2TCS+I-1-1), R )
560: ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
561: CALL DLARTGP( B22BULGE, B22D(I-1), RWORK(IV2TSN+I-1-1),
562: $ RWORK(IV2TCS+I-1-1), R )
563: ELSE IF( NU .LT. MU ) THEN
564: CALL DLARTGS( B12E(I-1), B12D(I), NU,
565: $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) )
566: ELSE
567: CALL DLARTGS( B22E(I-1), B22D(I), MU,
568: $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) )
569: END IF
570: *
571: TEMP = RWORK(IV1TCS+I-1)*B11D(I) + RWORK(IV1TSN+I-1)*B11E(I)
572: B11E(I) = RWORK(IV1TCS+I-1)*B11E(I) -
573: $ RWORK(IV1TSN+I-1)*B11D(I)
574: B11D(I) = TEMP
575: B11BULGE = RWORK(IV1TSN+I-1)*B11D(I+1)
576: B11D(I+1) = RWORK(IV1TCS+I-1)*B11D(I+1)
577: TEMP = RWORK(IV1TCS+I-1)*B21D(I) + RWORK(IV1TSN+I-1)*B21E(I)
578: B21E(I) = RWORK(IV1TCS+I-1)*B21E(I) -
579: $ RWORK(IV1TSN+I-1)*B21D(I)
580: B21D(I) = TEMP
581: B21BULGE = RWORK(IV1TSN+I-1)*B21D(I+1)
582: B21D(I+1) = RWORK(IV1TCS+I-1)*B21D(I+1)
583: TEMP = RWORK(IV2TCS+I-1-1)*B12E(I-1) +
584: $ RWORK(IV2TSN+I-1-1)*B12D(I)
585: B12D(I) = RWORK(IV2TCS+I-1-1)*B12D(I) -
586: $ RWORK(IV2TSN+I-1-1)*B12E(I-1)
587: B12E(I-1) = TEMP
588: B12BULGE = RWORK(IV2TSN+I-1-1)*B12E(I)
589: B12E(I) = RWORK(IV2TCS+I-1-1)*B12E(I)
590: TEMP = RWORK(IV2TCS+I-1-1)*B22E(I-1) +
591: $ RWORK(IV2TSN+I-1-1)*B22D(I)
592: B22D(I) = RWORK(IV2TCS+I-1-1)*B22D(I) -
593: $ RWORK(IV2TSN+I-1-1)*B22E(I-1)
594: B22E(I-1) = TEMP
595: B22BULGE = RWORK(IV2TSN+I-1-1)*B22E(I)
596: B22E(I) = RWORK(IV2TCS+I-1-1)*B22E(I)
597: *
598: * Compute THETA(I)
599: *
600: X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1)
601: X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE
602: Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1)
603: Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE
604: *
605: THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) )
606: *
607: * Determine if there are bulges to chase or if a new direct
608: * summand has been reached
609: *
610: RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2
611: RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2
612: RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2
613: RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2
614: *
615: * If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
616: * B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
617: * chasing by applying the original shift again.
618: *
619: IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN
620: CALL DLARTGP( X2, X1, RWORK(IU1SN+I-1), RWORK(IU1CS+I-1),
621: $ R )
622: ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN
623: CALL DLARTGP( B11BULGE, B11D(I), RWORK(IU1SN+I-1),
624: $ RWORK(IU1CS+I-1), R )
625: ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN
626: CALL DLARTGP( B12BULGE, B12E(I-1), RWORK(IU1SN+I-1),
627: $ RWORK(IU1CS+I-1), R )
628: ELSE IF( MU .LE. NU ) THEN
629: CALL DLARTGS( B11E(I), B11D(I+1), MU, RWORK(IU1CS+I-1),
630: $ RWORK(IU1SN+I-1) )
631: ELSE
632: CALL DLARTGS( B12D(I), B12E(I), NU, RWORK(IU1CS+I-1),
633: $ RWORK(IU1SN+I-1) )
634: END IF
635: IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN
636: CALL DLARTGP( Y2, Y1, RWORK(IU2SN+I-1), RWORK(IU2CS+I-1),
637: $ R )
638: ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN
639: CALL DLARTGP( B21BULGE, B21D(I), RWORK(IU2SN+I-1),
640: $ RWORK(IU2CS+I-1), R )
641: ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN
642: CALL DLARTGP( B22BULGE, B22E(I-1), RWORK(IU2SN+I-1),
643: $ RWORK(IU2CS+I-1), R )
644: ELSE IF( NU .LT. MU ) THEN
645: CALL DLARTGS( B21E(I), B21E(I+1), NU, RWORK(IU2CS+I-1),
646: $ RWORK(IU2SN+I-1) )
647: ELSE
648: CALL DLARTGS( B22D(I), B22E(I), MU, RWORK(IU2CS+I-1),
649: $ RWORK(IU2SN+I-1) )
650: END IF
651: RWORK(IU2CS+I-1) = -RWORK(IU2CS+I-1)
652: RWORK(IU2SN+I-1) = -RWORK(IU2SN+I-1)
653: *
654: TEMP = RWORK(IU1CS+I-1)*B11E(I) + RWORK(IU1SN+I-1)*B11D(I+1)
655: B11D(I+1) = RWORK(IU1CS+I-1)*B11D(I+1) -
656: $ RWORK(IU1SN+I-1)*B11E(I)
657: B11E(I) = TEMP
658: IF( I .LT. IMAX - 1 ) THEN
659: B11BULGE = RWORK(IU1SN+I-1)*B11E(I+1)
660: B11E(I+1) = RWORK(IU1CS+I-1)*B11E(I+1)
661: END IF
662: TEMP = RWORK(IU2CS+I-1)*B21E(I) + RWORK(IU2SN+I-1)*B21D(I+1)
663: B21D(I+1) = RWORK(IU2CS+I-1)*B21D(I+1) -
664: $ RWORK(IU2SN+I-1)*B21E(I)
665: B21E(I) = TEMP
666: IF( I .LT. IMAX - 1 ) THEN
667: B21BULGE = RWORK(IU2SN+I-1)*B21E(I+1)
668: B21E(I+1) = RWORK(IU2CS+I-1)*B21E(I+1)
669: END IF
670: TEMP = RWORK(IU1CS+I-1)*B12D(I) + RWORK(IU1SN+I-1)*B12E(I)
671: B12E(I) = RWORK(IU1CS+I-1)*B12E(I) -
672: $ RWORK(IU1SN+I-1)*B12D(I)
673: B12D(I) = TEMP
674: B12BULGE = RWORK(IU1SN+I-1)*B12D(I+1)
675: B12D(I+1) = RWORK(IU1CS+I-1)*B12D(I+1)
676: TEMP = RWORK(IU2CS+I-1)*B22D(I) + RWORK(IU2SN+I-1)*B22E(I)
677: B22E(I) = RWORK(IU2CS+I-1)*B22E(I) -
678: $ RWORK(IU2SN+I-1)*B22D(I)
679: B22D(I) = TEMP
680: B22BULGE = RWORK(IU2SN+I-1)*B22D(I+1)
681: B22D(I+1) = RWORK(IU2CS+I-1)*B22D(I+1)
682: *
683: END DO
684: *
685: * Compute PHI(IMAX-1)
686: *
687: X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) +
688: $ COS(THETA(IMAX-1))*B21E(IMAX-1)
689: Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) +
690: $ COS(THETA(IMAX-1))*B22D(IMAX-1)
691: Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE
692: *
693: PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) )
694: *
695: * Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
696: *
697: RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2
698: RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2
699: *
700: IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
701: CALL DLARTGP( Y2, Y1, RWORK(IV2TSN+IMAX-1-1),
702: $ RWORK(IV2TCS+IMAX-1-1), R )
703: ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
704: CALL DLARTGP( B12BULGE, B12D(IMAX-1),
705: $ RWORK(IV2TSN+IMAX-1-1),
706: $ RWORK(IV2TCS+IMAX-1-1), R )
707: ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
708: CALL DLARTGP( B22BULGE, B22D(IMAX-1),
709: $ RWORK(IV2TSN+IMAX-1-1),
710: $ RWORK(IV2TCS+IMAX-1-1), R )
711: ELSE IF( NU .LT. MU ) THEN
712: CALL DLARTGS( B12E(IMAX-1), B12D(IMAX), NU,
713: $ RWORK(IV2TCS+IMAX-1-1),
714: $ RWORK(IV2TSN+IMAX-1-1) )
715: ELSE
716: CALL DLARTGS( B22E(IMAX-1), B22D(IMAX), MU,
717: $ RWORK(IV2TCS+IMAX-1-1),
718: $ RWORK(IV2TSN+IMAX-1-1) )
719: END IF
720: *
721: TEMP = RWORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) +
722: $ RWORK(IV2TSN+IMAX-1-1)*B12D(IMAX)
723: B12D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B12D(IMAX) -
724: $ RWORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1)
725: B12E(IMAX-1) = TEMP
726: TEMP = RWORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) +
727: $ RWORK(IV2TSN+IMAX-1-1)*B22D(IMAX)
728: B22D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B22D(IMAX) -
729: $ RWORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1)
730: B22E(IMAX-1) = TEMP
731: *
732: * Update singular vectors
733: *
734: IF( WANTU1 ) THEN
735: IF( COLMAJOR ) THEN
736: CALL ZLASR( 'R', 'V', 'F', P, IMAX-IMIN+1,
737: $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1),
738: $ U1(1,IMIN), LDU1 )
739: ELSE
740: CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, P,
741: $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1),
742: $ U1(IMIN,1), LDU1 )
743: END IF
744: END IF
745: IF( WANTU2 ) THEN
746: IF( COLMAJOR ) THEN
747: CALL ZLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1,
748: $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1),
749: $ U2(1,IMIN), LDU2 )
750: ELSE
751: CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P,
752: $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1),
753: $ U2(IMIN,1), LDU2 )
754: END IF
755: END IF
756: IF( WANTV1T ) THEN
757: IF( COLMAJOR ) THEN
758: CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q,
759: $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1),
760: $ V1T(IMIN,1), LDV1T )
761: ELSE
762: CALL ZLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1,
763: $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1),
764: $ V1T(1,IMIN), LDV1T )
765: END IF
766: END IF
767: IF( WANTV2T ) THEN
768: IF( COLMAJOR ) THEN
769: CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q,
770: $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1),
771: $ V2T(IMIN,1), LDV2T )
772: ELSE
773: CALL ZLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1,
774: $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1),
775: $ V2T(1,IMIN), LDV2T )
776: END IF
777: END IF
778: *
779: * Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
780: *
781: IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN
782: B11D(IMAX) = -B11D(IMAX)
783: B21D(IMAX) = -B21D(IMAX)
784: IF( WANTV1T ) THEN
785: IF( COLMAJOR ) THEN
786: CALL ZSCAL( Q, NEGONECOMPLEX, V1T(IMAX,1), LDV1T )
787: ELSE
788: CALL ZSCAL( Q, NEGONECOMPLEX, V1T(1,IMAX), 1 )
789: END IF
790: END IF
791: END IF
792: *
793: * Compute THETA(IMAX)
794: *
795: X1 = COS(PHI(IMAX-1))*B11D(IMAX) +
796: $ SIN(PHI(IMAX-1))*B12E(IMAX-1)
797: Y1 = COS(PHI(IMAX-1))*B21D(IMAX) +
798: $ SIN(PHI(IMAX-1))*B22E(IMAX-1)
799: *
800: THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) )
801: *
802: * Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
803: * and B22(IMAX,IMAX-1)
804: *
805: IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN
806: B12D(IMAX) = -B12D(IMAX)
807: IF( WANTU1 ) THEN
808: IF( COLMAJOR ) THEN
809: CALL ZSCAL( P, NEGONECOMPLEX, U1(1,IMAX), 1 )
810: ELSE
811: CALL ZSCAL( P, NEGONECOMPLEX, U1(IMAX,1), LDU1 )
812: END IF
813: END IF
814: END IF
815: IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN
816: B22D(IMAX) = -B22D(IMAX)
817: IF( WANTU2 ) THEN
818: IF( COLMAJOR ) THEN
819: CALL ZSCAL( M-P, NEGONECOMPLEX, U2(1,IMAX), 1 )
820: ELSE
821: CALL ZSCAL( M-P, NEGONECOMPLEX, U2(IMAX,1), LDU2 )
822: END IF
823: END IF
824: END IF
825: *
826: * Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
827: *
828: IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN
829: IF( WANTV2T ) THEN
830: IF( COLMAJOR ) THEN
831: CALL ZSCAL( M-Q, NEGONECOMPLEX, V2T(IMAX,1), LDV2T )
832: ELSE
833: CALL ZSCAL( M-Q, NEGONECOMPLEX, V2T(1,IMAX), 1 )
834: END IF
835: END IF
836: END IF
837: *
838: * Test for negligible sines or cosines
839: *
840: DO I = IMIN, IMAX
841: IF( THETA(I) .LT. THRESH ) THEN
842: THETA(I) = ZERO
843: ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
844: THETA(I) = PIOVER2
845: END IF
846: END DO
847: DO I = IMIN, IMAX-1
848: IF( PHI(I) .LT. THRESH ) THEN
849: PHI(I) = ZERO
850: ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
851: PHI(I) = PIOVER2
852: END IF
853: END DO
854: *
855: * Deflate
856: *
857: IF (IMAX .GT. 1) THEN
858: DO WHILE( PHI(IMAX-1) .EQ. ZERO )
859: IMAX = IMAX - 1
860: IF (IMAX .LE. 1) EXIT
861: END DO
862: END IF
863: IF( IMIN .GT. IMAX - 1 )
864: $ IMIN = IMAX - 1
865: IF (IMIN .GT. 1) THEN
866: DO WHILE (PHI(IMIN-1) .NE. ZERO)
867: IMIN = IMIN - 1
868: IF (IMIN .LE. 1) EXIT
869: END DO
870: END IF
871: *
872: * Repeat main iteration loop
873: *
874: END DO
875: *
876: * Postprocessing: order THETA from least to greatest
877: *
878: DO I = 1, Q
879: *
880: MINI = I
881: THETAMIN = THETA(I)
882: DO J = I+1, Q
883: IF( THETA(J) .LT. THETAMIN ) THEN
884: MINI = J
885: THETAMIN = THETA(J)
886: END IF
887: END DO
888: *
889: IF( MINI .NE. I ) THEN
890: THETA(MINI) = THETA(I)
891: THETA(I) = THETAMIN
892: IF( COLMAJOR ) THEN
893: IF( WANTU1 )
894: $ CALL ZSWAP( P, U1(1,I), 1, U1(1,MINI), 1 )
895: IF( WANTU2 )
896: $ CALL ZSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 )
897: IF( WANTV1T )
898: $ CALL ZSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T )
899: IF( WANTV2T )
900: $ CALL ZSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1),
901: $ LDV2T )
902: ELSE
903: IF( WANTU1 )
904: $ CALL ZSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 )
905: IF( WANTU2 )
906: $ CALL ZSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 )
907: IF( WANTV1T )
908: $ CALL ZSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 )
909: IF( WANTV2T )
910: $ CALL ZSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 )
911: END IF
912: END IF
913: *
914: END DO
915: *
916: RETURN
917: *
918: * End of ZBBCSD
919: *
920: END
921:
CVSweb interface <joel.bertrand@systella.fr>