Annotation of rpl/lapack/lapack/dgesvd.f, revision 1.2
1.1 bertrand 1: SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
2: $ WORK, LWORK, INFO )
3: *
4: * -- LAPACK driver routine (version 3.2) --
5: * -- LAPACK is a software package provided by Univ. of Tennessee, --
6: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
7: * November 2006
8: *
9: * .. Scalar Arguments ..
10: CHARACTER JOBU, JOBVT
11: INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
12: * ..
13: * .. Array Arguments ..
14: DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
15: $ VT( LDVT, * ), WORK( * )
16: * ..
17: *
18: * Purpose
19: * =======
20: *
21: * DGESVD computes the singular value decomposition (SVD) of a real
22: * M-by-N matrix A, optionally computing the left and/or right singular
23: * vectors. The SVD is written
24: *
25: * A = U * SIGMA * transpose(V)
26: *
27: * where SIGMA is an M-by-N matrix which is zero except for its
28: * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
29: * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
30: * are the singular values of A; they are real and non-negative, and
31: * are returned in descending order. The first min(m,n) columns of
32: * U and V are the left and right singular vectors of A.
33: *
34: * Note that the routine returns V**T, not V.
35: *
36: * Arguments
37: * =========
38: *
39: * JOBU (input) CHARACTER*1
40: * Specifies options for computing all or part of the matrix U:
41: * = 'A': all M columns of U are returned in array U:
42: * = 'S': the first min(m,n) columns of U (the left singular
43: * vectors) are returned in the array U;
44: * = 'O': the first min(m,n) columns of U (the left singular
45: * vectors) are overwritten on the array A;
46: * = 'N': no columns of U (no left singular vectors) are
47: * computed.
48: *
49: * JOBVT (input) CHARACTER*1
50: * Specifies options for computing all or part of the matrix
51: * V**T:
52: * = 'A': all N rows of V**T are returned in the array VT;
53: * = 'S': the first min(m,n) rows of V**T (the right singular
54: * vectors) are returned in the array VT;
55: * = 'O': the first min(m,n) rows of V**T (the right singular
56: * vectors) are overwritten on the array A;
57: * = 'N': no rows of V**T (no right singular vectors) are
58: * computed.
59: *
60: * JOBVT and JOBU cannot both be 'O'.
61: *
62: * M (input) INTEGER
63: * The number of rows of the input matrix A. M >= 0.
64: *
65: * N (input) INTEGER
66: * The number of columns of the input matrix A. N >= 0.
67: *
68: * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
69: * On entry, the M-by-N matrix A.
70: * On exit,
71: * if JOBU = 'O', A is overwritten with the first min(m,n)
72: * columns of U (the left singular vectors,
73: * stored columnwise);
74: * if JOBVT = 'O', A is overwritten with the first min(m,n)
75: * rows of V**T (the right singular vectors,
76: * stored rowwise);
77: * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
78: * are destroyed.
79: *
80: * LDA (input) INTEGER
81: * The leading dimension of the array A. LDA >= max(1,M).
82: *
83: * S (output) DOUBLE PRECISION array, dimension (min(M,N))
84: * The singular values of A, sorted so that S(i) >= S(i+1).
85: *
86: * U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
87: * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
88: * If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
89: * if JOBU = 'S', U contains the first min(m,n) columns of U
90: * (the left singular vectors, stored columnwise);
91: * if JOBU = 'N' or 'O', U is not referenced.
92: *
93: * LDU (input) INTEGER
94: * The leading dimension of the array U. LDU >= 1; if
95: * JOBU = 'S' or 'A', LDU >= M.
96: *
97: * VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
98: * If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
99: * V**T;
100: * if JOBVT = 'S', VT contains the first min(m,n) rows of
101: * V**T (the right singular vectors, stored rowwise);
102: * if JOBVT = 'N' or 'O', VT is not referenced.
103: *
104: * LDVT (input) INTEGER
105: * The leading dimension of the array VT. LDVT >= 1; if
106: * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
107: *
108: * WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
109: * On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
110: * if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
111: * superdiagonal elements of an upper bidiagonal matrix B
112: * whose diagonal is in S (not necessarily sorted). B
113: * satisfies A = U * B * VT, so it has the same singular values
114: * as A, and singular vectors related by U and VT.
115: *
116: * LWORK (input) INTEGER
117: * The dimension of the array WORK.
118: * LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
119: * For good performance, LWORK should generally be larger.
120: *
121: * If LWORK = -1, then a workspace query is assumed; the routine
122: * only calculates the optimal size of the WORK array, returns
123: * this value as the first entry of the WORK array, and no error
124: * message related to LWORK is issued by XERBLA.
125: *
126: * INFO (output) INTEGER
127: * = 0: successful exit.
128: * < 0: if INFO = -i, the i-th argument had an illegal value.
129: * > 0: if DBDSQR did not converge, INFO specifies how many
130: * superdiagonals of an intermediate bidiagonal form B
131: * did not converge to zero. See the description of WORK
132: * above for details.
133: *
134: * =====================================================================
135: *
136: * .. Parameters ..
137: DOUBLE PRECISION ZERO, ONE
138: PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
139: * ..
140: * .. Local Scalars ..
141: LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
142: $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
143: INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
144: $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
145: $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
146: $ NRVT, WRKBL
147: DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
148: * ..
149: * .. Local Arrays ..
150: DOUBLE PRECISION DUM( 1 )
151: * ..
152: * .. External Subroutines ..
153: EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
154: $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
155: $ XERBLA
156: * ..
157: * .. External Functions ..
158: LOGICAL LSAME
159: INTEGER ILAENV
160: DOUBLE PRECISION DLAMCH, DLANGE
161: EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE
162: * ..
163: * .. Intrinsic Functions ..
164: INTRINSIC MAX, MIN, SQRT
165: * ..
166: * .. Executable Statements ..
167: *
168: * Test the input arguments
169: *
170: INFO = 0
171: MINMN = MIN( M, N )
172: WNTUA = LSAME( JOBU, 'A' )
173: WNTUS = LSAME( JOBU, 'S' )
174: WNTUAS = WNTUA .OR. WNTUS
175: WNTUO = LSAME( JOBU, 'O' )
176: WNTUN = LSAME( JOBU, 'N' )
177: WNTVA = LSAME( JOBVT, 'A' )
178: WNTVS = LSAME( JOBVT, 'S' )
179: WNTVAS = WNTVA .OR. WNTVS
180: WNTVO = LSAME( JOBVT, 'O' )
181: WNTVN = LSAME( JOBVT, 'N' )
182: LQUERY = ( LWORK.EQ.-1 )
183: *
184: IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
185: INFO = -1
186: ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
187: $ ( WNTVO .AND. WNTUO ) ) THEN
188: INFO = -2
189: ELSE IF( M.LT.0 ) THEN
190: INFO = -3
191: ELSE IF( N.LT.0 ) THEN
192: INFO = -4
193: ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
194: INFO = -6
195: ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
196: INFO = -9
197: ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
198: $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
199: INFO = -11
200: END IF
201: *
202: * Compute workspace
203: * (Note: Comments in the code beginning "Workspace:" describe the
204: * minimal amount of workspace needed at that point in the code,
205: * as well as the preferred amount for good performance.
206: * NB refers to the optimal block size for the immediately
207: * following subroutine, as returned by ILAENV.)
208: *
209: IF( INFO.EQ.0 ) THEN
210: MINWRK = 1
211: MAXWRK = 1
212: IF( M.GE.N .AND. MINMN.GT.0 ) THEN
213: *
214: * Compute space needed for DBDSQR
215: *
216: MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
217: BDSPAC = 5*N
218: IF( M.GE.MNTHR ) THEN
219: IF( WNTUN ) THEN
220: *
221: * Path 1 (M much larger than N, JOBU='N')
222: *
223: MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
224: $ -1 )
225: MAXWRK = MAX( MAXWRK, 3*N+2*N*
226: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
227: IF( WNTVO .OR. WNTVAS )
228: $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
229: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
230: MAXWRK = MAX( MAXWRK, BDSPAC )
231: MINWRK = MAX( 4*N, BDSPAC )
232: ELSE IF( WNTUO .AND. WNTVN ) THEN
233: *
234: * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
235: *
236: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
237: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
238: $ N, N, -1 ) )
239: WRKBL = MAX( WRKBL, 3*N+2*N*
240: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
241: WRKBL = MAX( WRKBL, 3*N+N*
242: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
243: WRKBL = MAX( WRKBL, BDSPAC )
244: MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
245: MINWRK = MAX( 3*N+M, BDSPAC )
246: ELSE IF( WNTUO .AND. WNTVAS ) THEN
247: *
248: * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
249: * 'A')
250: *
251: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
252: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
253: $ N, N, -1 ) )
254: WRKBL = MAX( WRKBL, 3*N+2*N*
255: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
256: WRKBL = MAX( WRKBL, 3*N+N*
257: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
258: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
259: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
260: WRKBL = MAX( WRKBL, BDSPAC )
261: MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
262: MINWRK = MAX( 3*N+M, BDSPAC )
263: ELSE IF( WNTUS .AND. WNTVN ) THEN
264: *
265: * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
266: *
267: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
268: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
269: $ N, N, -1 ) )
270: WRKBL = MAX( WRKBL, 3*N+2*N*
271: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
272: WRKBL = MAX( WRKBL, 3*N+N*
273: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
274: WRKBL = MAX( WRKBL, BDSPAC )
275: MAXWRK = N*N + WRKBL
276: MINWRK = MAX( 3*N+M, BDSPAC )
277: ELSE IF( WNTUS .AND. WNTVO ) THEN
278: *
279: * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
280: *
281: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
282: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
283: $ N, N, -1 ) )
284: WRKBL = MAX( WRKBL, 3*N+2*N*
285: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
286: WRKBL = MAX( WRKBL, 3*N+N*
287: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
288: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
289: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
290: WRKBL = MAX( WRKBL, BDSPAC )
291: MAXWRK = 2*N*N + WRKBL
292: MINWRK = MAX( 3*N+M, BDSPAC )
293: ELSE IF( WNTUS .AND. WNTVAS ) THEN
294: *
295: * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
296: * 'A')
297: *
298: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
299: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
300: $ N, N, -1 ) )
301: WRKBL = MAX( WRKBL, 3*N+2*N*
302: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
303: WRKBL = MAX( WRKBL, 3*N+N*
304: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
305: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
306: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
307: WRKBL = MAX( WRKBL, BDSPAC )
308: MAXWRK = N*N + WRKBL
309: MINWRK = MAX( 3*N+M, BDSPAC )
310: ELSE IF( WNTUA .AND. WNTVN ) THEN
311: *
312: * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
313: *
314: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
315: WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
316: $ M, N, -1 ) )
317: WRKBL = MAX( WRKBL, 3*N+2*N*
318: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
319: WRKBL = MAX( WRKBL, 3*N+N*
320: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
321: WRKBL = MAX( WRKBL, BDSPAC )
322: MAXWRK = N*N + WRKBL
323: MINWRK = MAX( 3*N+M, BDSPAC )
324: ELSE IF( WNTUA .AND. WNTVO ) THEN
325: *
326: * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
327: *
328: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
329: WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
330: $ M, N, -1 ) )
331: WRKBL = MAX( WRKBL, 3*N+2*N*
332: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
333: WRKBL = MAX( WRKBL, 3*N+N*
334: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
335: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
336: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
337: WRKBL = MAX( WRKBL, BDSPAC )
338: MAXWRK = 2*N*N + WRKBL
339: MINWRK = MAX( 3*N+M, BDSPAC )
340: ELSE IF( WNTUA .AND. WNTVAS ) THEN
341: *
342: * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
343: * 'A')
344: *
345: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
346: WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
347: $ M, N, -1 ) )
348: WRKBL = MAX( WRKBL, 3*N+2*N*
349: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
350: WRKBL = MAX( WRKBL, 3*N+N*
351: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
352: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
353: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
354: WRKBL = MAX( WRKBL, BDSPAC )
355: MAXWRK = N*N + WRKBL
356: MINWRK = MAX( 3*N+M, BDSPAC )
357: END IF
358: ELSE
359: *
360: * Path 10 (M at least N, but not much larger)
361: *
362: MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
363: $ -1, -1 )
364: IF( WNTUS .OR. WNTUO )
365: $ MAXWRK = MAX( MAXWRK, 3*N+N*
366: $ ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) )
367: IF( WNTUA )
368: $ MAXWRK = MAX( MAXWRK, 3*N+M*
369: $ ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) )
370: IF( .NOT.WNTVN )
371: $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
372: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
373: MAXWRK = MAX( MAXWRK, BDSPAC )
374: MINWRK = MAX( 3*N+M, BDSPAC )
375: END IF
376: ELSE IF( MINMN.GT.0 ) THEN
377: *
378: * Compute space needed for DBDSQR
379: *
380: MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
381: BDSPAC = 5*M
382: IF( N.GE.MNTHR ) THEN
383: IF( WNTVN ) THEN
384: *
385: * Path 1t(N much larger than M, JOBVT='N')
386: *
387: MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
388: $ -1 )
389: MAXWRK = MAX( MAXWRK, 3*M+2*M*
390: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
391: IF( WNTUO .OR. WNTUAS )
392: $ MAXWRK = MAX( MAXWRK, 3*M+M*
393: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
394: MAXWRK = MAX( MAXWRK, BDSPAC )
395: MINWRK = MAX( 4*M, BDSPAC )
396: ELSE IF( WNTVO .AND. WNTUN ) THEN
397: *
398: * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
399: *
400: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
401: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
402: $ N, M, -1 ) )
403: WRKBL = MAX( WRKBL, 3*M+2*M*
404: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
405: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
406: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
407: WRKBL = MAX( WRKBL, BDSPAC )
408: MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
409: MINWRK = MAX( 3*M+N, BDSPAC )
410: ELSE IF( WNTVO .AND. WNTUAS ) THEN
411: *
412: * Path 3t(N much larger than M, JOBU='S' or 'A',
413: * JOBVT='O')
414: *
415: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
416: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
417: $ N, M, -1 ) )
418: WRKBL = MAX( WRKBL, 3*M+2*M*
419: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
420: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
421: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
422: WRKBL = MAX( WRKBL, 3*M+M*
423: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
424: WRKBL = MAX( WRKBL, BDSPAC )
425: MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
426: MINWRK = MAX( 3*M+N, BDSPAC )
427: ELSE IF( WNTVS .AND. WNTUN ) THEN
428: *
429: * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
430: *
431: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
432: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
433: $ N, M, -1 ) )
434: WRKBL = MAX( WRKBL, 3*M+2*M*
435: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
436: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
437: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
438: WRKBL = MAX( WRKBL, BDSPAC )
439: MAXWRK = M*M + WRKBL
440: MINWRK = MAX( 3*M+N, BDSPAC )
441: ELSE IF( WNTVS .AND. WNTUO ) THEN
442: *
443: * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
444: *
445: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
446: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
447: $ N, M, -1 ) )
448: WRKBL = MAX( WRKBL, 3*M+2*M*
449: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
450: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
451: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
452: WRKBL = MAX( WRKBL, 3*M+M*
453: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
454: WRKBL = MAX( WRKBL, BDSPAC )
455: MAXWRK = 2*M*M + WRKBL
456: MINWRK = MAX( 3*M+N, BDSPAC )
457: ELSE IF( WNTVS .AND. WNTUAS ) THEN
458: *
459: * Path 6t(N much larger than M, JOBU='S' or 'A',
460: * JOBVT='S')
461: *
462: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
463: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
464: $ N, M, -1 ) )
465: WRKBL = MAX( WRKBL, 3*M+2*M*
466: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
467: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
468: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
469: WRKBL = MAX( WRKBL, 3*M+M*
470: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
471: WRKBL = MAX( WRKBL, BDSPAC )
472: MAXWRK = M*M + WRKBL
473: MINWRK = MAX( 3*M+N, BDSPAC )
474: ELSE IF( WNTVA .AND. WNTUN ) THEN
475: *
476: * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
477: *
478: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
479: WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
480: $ N, M, -1 ) )
481: WRKBL = MAX( WRKBL, 3*M+2*M*
482: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
483: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
484: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
485: WRKBL = MAX( WRKBL, BDSPAC )
486: MAXWRK = M*M + WRKBL
487: MINWRK = MAX( 3*M+N, BDSPAC )
488: ELSE IF( WNTVA .AND. WNTUO ) THEN
489: *
490: * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
491: *
492: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
493: WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
494: $ N, M, -1 ) )
495: WRKBL = MAX( WRKBL, 3*M+2*M*
496: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
497: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
498: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
499: WRKBL = MAX( WRKBL, 3*M+M*
500: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
501: WRKBL = MAX( WRKBL, BDSPAC )
502: MAXWRK = 2*M*M + WRKBL
503: MINWRK = MAX( 3*M+N, BDSPAC )
504: ELSE IF( WNTVA .AND. WNTUAS ) THEN
505: *
506: * Path 9t(N much larger than M, JOBU='S' or 'A',
507: * JOBVT='A')
508: *
509: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
510: WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
511: $ N, M, -1 ) )
512: WRKBL = MAX( WRKBL, 3*M+2*M*
513: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
514: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
515: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
516: WRKBL = MAX( WRKBL, 3*M+M*
517: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
518: WRKBL = MAX( WRKBL, BDSPAC )
519: MAXWRK = M*M + WRKBL
520: MINWRK = MAX( 3*M+N, BDSPAC )
521: END IF
522: ELSE
523: *
524: * Path 10t(N greater than M, but not much larger)
525: *
526: MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
527: $ -1, -1 )
528: IF( WNTVS .OR. WNTVO )
529: $ MAXWRK = MAX( MAXWRK, 3*M+M*
530: $ ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
531: IF( WNTVA )
532: $ MAXWRK = MAX( MAXWRK, 3*M+N*
533: $ ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) )
534: IF( .NOT.WNTUN )
535: $ MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
536: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
537: MAXWRK = MAX( MAXWRK, BDSPAC )
538: MINWRK = MAX( 3*M+N, BDSPAC )
539: END IF
540: END IF
541: MAXWRK = MAX( MAXWRK, MINWRK )
542: WORK( 1 ) = MAXWRK
543: *
544: IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
545: INFO = -13
546: END IF
547: END IF
548: *
549: IF( INFO.NE.0 ) THEN
550: CALL XERBLA( 'DGESVD', -INFO )
551: RETURN
552: ELSE IF( LQUERY ) THEN
553: RETURN
554: END IF
555: *
556: * Quick return if possible
557: *
558: IF( M.EQ.0 .OR. N.EQ.0 ) THEN
559: RETURN
560: END IF
561: *
562: * Get machine constants
563: *
564: EPS = DLAMCH( 'P' )
565: SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
566: BIGNUM = ONE / SMLNUM
567: *
568: * Scale A if max element outside range [SMLNUM,BIGNUM]
569: *
570: ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
571: ISCL = 0
572: IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
573: ISCL = 1
574: CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
575: ELSE IF( ANRM.GT.BIGNUM ) THEN
576: ISCL = 1
577: CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
578: END IF
579: *
580: IF( M.GE.N ) THEN
581: *
582: * A has at least as many rows as columns. If A has sufficiently
583: * more rows than columns, first reduce using the QR
584: * decomposition (if sufficient workspace available)
585: *
586: IF( M.GE.MNTHR ) THEN
587: *
588: IF( WNTUN ) THEN
589: *
590: * Path 1 (M much larger than N, JOBU='N')
591: * No left singular vectors to be computed
592: *
593: ITAU = 1
594: IWORK = ITAU + N
595: *
596: * Compute A=Q*R
597: * (Workspace: need 2*N, prefer N+N*NB)
598: *
599: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
600: $ LWORK-IWORK+1, IERR )
601: *
602: * Zero out below R
603: *
604: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
605: IE = 1
606: ITAUQ = IE + N
607: ITAUP = ITAUQ + N
608: IWORK = ITAUP + N
609: *
610: * Bidiagonalize R in A
611: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
612: *
613: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
614: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
615: $ IERR )
616: NCVT = 0
617: IF( WNTVO .OR. WNTVAS ) THEN
618: *
619: * If right singular vectors desired, generate P'.
620: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
621: *
622: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
623: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
624: NCVT = N
625: END IF
626: IWORK = IE + N
627: *
628: * Perform bidiagonal QR iteration, computing right
629: * singular vectors of A in A if desired
630: * (Workspace: need BDSPAC)
631: *
632: CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
633: $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
634: *
635: * If right singular vectors desired in VT, copy them there
636: *
637: IF( WNTVAS )
638: $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
639: *
640: ELSE IF( WNTUO .AND. WNTVN ) THEN
641: *
642: * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
643: * N left singular vectors to be overwritten on A and
644: * no right singular vectors to be computed
645: *
646: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
647: *
648: * Sufficient workspace for a fast algorithm
649: *
650: IR = 1
651: IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
652: *
653: * WORK(IU) is LDA by N, WORK(IR) is LDA by N
654: *
655: LDWRKU = LDA
656: LDWRKR = LDA
657: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
658: *
659: * WORK(IU) is LDA by N, WORK(IR) is N by N
660: *
661: LDWRKU = LDA
662: LDWRKR = N
663: ELSE
664: *
665: * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
666: *
667: LDWRKU = ( LWORK-N*N-N ) / N
668: LDWRKR = N
669: END IF
670: ITAU = IR + LDWRKR*N
671: IWORK = ITAU + N
672: *
673: * Compute A=Q*R
674: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
675: *
676: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
677: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
678: *
679: * Copy R to WORK(IR) and zero out below it
680: *
681: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
682: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
683: $ LDWRKR )
684: *
685: * Generate Q in A
686: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
687: *
688: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
689: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
690: IE = ITAU
691: ITAUQ = IE + N
692: ITAUP = ITAUQ + N
693: IWORK = ITAUP + N
694: *
695: * Bidiagonalize R in WORK(IR)
696: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
697: *
698: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
699: $ WORK( ITAUQ ), WORK( ITAUP ),
700: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
701: *
702: * Generate left vectors bidiagonalizing R
703: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
704: *
705: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
706: $ WORK( ITAUQ ), WORK( IWORK ),
707: $ LWORK-IWORK+1, IERR )
708: IWORK = IE + N
709: *
710: * Perform bidiagonal QR iteration, computing left
711: * singular vectors of R in WORK(IR)
712: * (Workspace: need N*N+BDSPAC)
713: *
714: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
715: $ WORK( IR ), LDWRKR, DUM, 1,
716: $ WORK( IWORK ), INFO )
717: IU = IE + N
718: *
719: * Multiply Q in A by left singular vectors of R in
720: * WORK(IR), storing result in WORK(IU) and copying to A
721: * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
722: *
723: DO 10 I = 1, M, LDWRKU
724: CHUNK = MIN( M-I+1, LDWRKU )
725: CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
726: $ LDA, WORK( IR ), LDWRKR, ZERO,
727: $ WORK( IU ), LDWRKU )
728: CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
729: $ A( I, 1 ), LDA )
730: 10 CONTINUE
731: *
732: ELSE
733: *
734: * Insufficient workspace for a fast algorithm
735: *
736: IE = 1
737: ITAUQ = IE + N
738: ITAUP = ITAUQ + N
739: IWORK = ITAUP + N
740: *
741: * Bidiagonalize A
742: * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
743: *
744: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
745: $ WORK( ITAUQ ), WORK( ITAUP ),
746: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
747: *
748: * Generate left vectors bidiagonalizing A
749: * (Workspace: need 4*N, prefer 3*N+N*NB)
750: *
751: CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
752: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
753: IWORK = IE + N
754: *
755: * Perform bidiagonal QR iteration, computing left
756: * singular vectors of A in A
757: * (Workspace: need BDSPAC)
758: *
759: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
760: $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
761: *
762: END IF
763: *
764: ELSE IF( WNTUO .AND. WNTVAS ) THEN
765: *
766: * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
767: * N left singular vectors to be overwritten on A and
768: * N right singular vectors to be computed in VT
769: *
770: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
771: *
772: * Sufficient workspace for a fast algorithm
773: *
774: IR = 1
775: IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
776: *
777: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
778: *
779: LDWRKU = LDA
780: LDWRKR = LDA
781: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
782: *
783: * WORK(IU) is LDA by N and WORK(IR) is N by N
784: *
785: LDWRKU = LDA
786: LDWRKR = N
787: ELSE
788: *
789: * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
790: *
791: LDWRKU = ( LWORK-N*N-N ) / N
792: LDWRKR = N
793: END IF
794: ITAU = IR + LDWRKR*N
795: IWORK = ITAU + N
796: *
797: * Compute A=Q*R
798: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
799: *
800: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
801: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
802: *
803: * Copy R to VT, zeroing out below it
804: *
805: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
806: IF( N.GT.1 )
807: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
808: $ VT( 2, 1 ), LDVT )
809: *
810: * Generate Q in A
811: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
812: *
813: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
814: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
815: IE = ITAU
816: ITAUQ = IE + N
817: ITAUP = ITAUQ + N
818: IWORK = ITAUP + N
819: *
820: * Bidiagonalize R in VT, copying result to WORK(IR)
821: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
822: *
823: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
824: $ WORK( ITAUQ ), WORK( ITAUP ),
825: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
826: CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
827: *
828: * Generate left vectors bidiagonalizing R in WORK(IR)
829: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
830: *
831: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
832: $ WORK( ITAUQ ), WORK( IWORK ),
833: $ LWORK-IWORK+1, IERR )
834: *
835: * Generate right vectors bidiagonalizing R in VT
836: * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
837: *
838: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
839: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
840: IWORK = IE + N
841: *
842: * Perform bidiagonal QR iteration, computing left
843: * singular vectors of R in WORK(IR) and computing right
844: * singular vectors of R in VT
845: * (Workspace: need N*N+BDSPAC)
846: *
847: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
848: $ WORK( IR ), LDWRKR, DUM, 1,
849: $ WORK( IWORK ), INFO )
850: IU = IE + N
851: *
852: * Multiply Q in A by left singular vectors of R in
853: * WORK(IR), storing result in WORK(IU) and copying to A
854: * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
855: *
856: DO 20 I = 1, M, LDWRKU
857: CHUNK = MIN( M-I+1, LDWRKU )
858: CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
859: $ LDA, WORK( IR ), LDWRKR, ZERO,
860: $ WORK( IU ), LDWRKU )
861: CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
862: $ A( I, 1 ), LDA )
863: 20 CONTINUE
864: *
865: ELSE
866: *
867: * Insufficient workspace for a fast algorithm
868: *
869: ITAU = 1
870: IWORK = ITAU + N
871: *
872: * Compute A=Q*R
873: * (Workspace: need 2*N, prefer N+N*NB)
874: *
875: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
876: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
877: *
878: * Copy R to VT, zeroing out below it
879: *
880: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
881: IF( N.GT.1 )
882: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
883: $ VT( 2, 1 ), LDVT )
884: *
885: * Generate Q in A
886: * (Workspace: need 2*N, prefer N+N*NB)
887: *
888: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
889: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
890: IE = ITAU
891: ITAUQ = IE + N
892: ITAUP = ITAUQ + N
893: IWORK = ITAUP + N
894: *
895: * Bidiagonalize R in VT
896: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
897: *
898: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
899: $ WORK( ITAUQ ), WORK( ITAUP ),
900: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
901: *
902: * Multiply Q in A by left vectors bidiagonalizing R
903: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
904: *
905: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
906: $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
907: $ LWORK-IWORK+1, IERR )
908: *
909: * Generate right vectors bidiagonalizing R in VT
910: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
911: *
912: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
913: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
914: IWORK = IE + N
915: *
916: * Perform bidiagonal QR iteration, computing left
917: * singular vectors of A in A and computing right
918: * singular vectors of A in VT
919: * (Workspace: need BDSPAC)
920: *
921: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
922: $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
923: *
924: END IF
925: *
926: ELSE IF( WNTUS ) THEN
927: *
928: IF( WNTVN ) THEN
929: *
930: * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
931: * N left singular vectors to be computed in U and
932: * no right singular vectors to be computed
933: *
934: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
935: *
936: * Sufficient workspace for a fast algorithm
937: *
938: IR = 1
939: IF( LWORK.GE.WRKBL+LDA*N ) THEN
940: *
941: * WORK(IR) is LDA by N
942: *
943: LDWRKR = LDA
944: ELSE
945: *
946: * WORK(IR) is N by N
947: *
948: LDWRKR = N
949: END IF
950: ITAU = IR + LDWRKR*N
951: IWORK = ITAU + N
952: *
953: * Compute A=Q*R
954: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
955: *
956: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
957: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
958: *
959: * Copy R to WORK(IR), zeroing out below it
960: *
961: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
962: $ LDWRKR )
963: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
964: $ WORK( IR+1 ), LDWRKR )
965: *
966: * Generate Q in A
967: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
968: *
969: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
970: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
971: IE = ITAU
972: ITAUQ = IE + N
973: ITAUP = ITAUQ + N
974: IWORK = ITAUP + N
975: *
976: * Bidiagonalize R in WORK(IR)
977: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
978: *
979: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
980: $ WORK( IE ), WORK( ITAUQ ),
981: $ WORK( ITAUP ), WORK( IWORK ),
982: $ LWORK-IWORK+1, IERR )
983: *
984: * Generate left vectors bidiagonalizing R in WORK(IR)
985: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
986: *
987: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
988: $ WORK( ITAUQ ), WORK( IWORK ),
989: $ LWORK-IWORK+1, IERR )
990: IWORK = IE + N
991: *
992: * Perform bidiagonal QR iteration, computing left
993: * singular vectors of R in WORK(IR)
994: * (Workspace: need N*N+BDSPAC)
995: *
996: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
997: $ 1, WORK( IR ), LDWRKR, DUM, 1,
998: $ WORK( IWORK ), INFO )
999: *
1000: * Multiply Q in A by left singular vectors of R in
1001: * WORK(IR), storing result in U
1002: * (Workspace: need N*N)
1003: *
1004: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1005: $ WORK( IR ), LDWRKR, ZERO, U, LDU )
1006: *
1007: ELSE
1008: *
1009: * Insufficient workspace for a fast algorithm
1010: *
1011: ITAU = 1
1012: IWORK = ITAU + N
1013: *
1014: * Compute A=Q*R, copying result to U
1015: * (Workspace: need 2*N, prefer N+N*NB)
1016: *
1017: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1018: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1019: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1020: *
1021: * Generate Q in U
1022: * (Workspace: need 2*N, prefer N+N*NB)
1023: *
1024: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1025: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1026: IE = ITAU
1027: ITAUQ = IE + N
1028: ITAUP = ITAUQ + N
1029: IWORK = ITAUP + N
1030: *
1031: * Zero out below R in A
1032: *
1033: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1034: $ LDA )
1035: *
1036: * Bidiagonalize R in A
1037: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1038: *
1039: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1040: $ WORK( ITAUQ ), WORK( ITAUP ),
1041: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1042: *
1043: * Multiply Q in U by left vectors bidiagonalizing R
1044: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1045: *
1046: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1047: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1048: $ LWORK-IWORK+1, IERR )
1049: IWORK = IE + N
1050: *
1051: * Perform bidiagonal QR iteration, computing left
1052: * singular vectors of A in U
1053: * (Workspace: need BDSPAC)
1054: *
1055: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1056: $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1057: $ INFO )
1058: *
1059: END IF
1060: *
1061: ELSE IF( WNTVO ) THEN
1062: *
1063: * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1064: * N left singular vectors to be computed in U and
1065: * N right singular vectors to be overwritten on A
1066: *
1067: IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1068: *
1069: * Sufficient workspace for a fast algorithm
1070: *
1071: IU = 1
1072: IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1073: *
1074: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1075: *
1076: LDWRKU = LDA
1077: IR = IU + LDWRKU*N
1078: LDWRKR = LDA
1079: ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1080: *
1081: * WORK(IU) is LDA by N and WORK(IR) is N by N
1082: *
1083: LDWRKU = LDA
1084: IR = IU + LDWRKU*N
1085: LDWRKR = N
1086: ELSE
1087: *
1088: * WORK(IU) is N by N and WORK(IR) is N by N
1089: *
1090: LDWRKU = N
1091: IR = IU + LDWRKU*N
1092: LDWRKR = N
1093: END IF
1094: ITAU = IR + LDWRKR*N
1095: IWORK = ITAU + N
1096: *
1097: * Compute A=Q*R
1098: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1099: *
1100: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1101: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1102: *
1103: * Copy R to WORK(IU), zeroing out below it
1104: *
1105: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1106: $ LDWRKU )
1107: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1108: $ WORK( IU+1 ), LDWRKU )
1109: *
1110: * Generate Q in A
1111: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1112: *
1113: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1114: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1115: IE = ITAU
1116: ITAUQ = IE + N
1117: ITAUP = ITAUQ + N
1118: IWORK = ITAUP + N
1119: *
1120: * Bidiagonalize R in WORK(IU), copying result to
1121: * WORK(IR)
1122: * (Workspace: need 2*N*N+4*N,
1123: * prefer 2*N*N+3*N+2*N*NB)
1124: *
1125: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1126: $ WORK( IE ), WORK( ITAUQ ),
1127: $ WORK( ITAUP ), WORK( IWORK ),
1128: $ LWORK-IWORK+1, IERR )
1129: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1130: $ WORK( IR ), LDWRKR )
1131: *
1132: * Generate left bidiagonalizing vectors in WORK(IU)
1133: * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1134: *
1135: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1136: $ WORK( ITAUQ ), WORK( IWORK ),
1137: $ LWORK-IWORK+1, IERR )
1138: *
1139: * Generate right bidiagonalizing vectors in WORK(IR)
1140: * (Workspace: need 2*N*N+4*N-1,
1141: * prefer 2*N*N+3*N+(N-1)*NB)
1142: *
1143: CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1144: $ WORK( ITAUP ), WORK( IWORK ),
1145: $ LWORK-IWORK+1, IERR )
1146: IWORK = IE + N
1147: *
1148: * Perform bidiagonal QR iteration, computing left
1149: * singular vectors of R in WORK(IU) and computing
1150: * right singular vectors of R in WORK(IR)
1151: * (Workspace: need 2*N*N+BDSPAC)
1152: *
1153: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1154: $ WORK( IR ), LDWRKR, WORK( IU ),
1155: $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1156: *
1157: * Multiply Q in A by left singular vectors of R in
1158: * WORK(IU), storing result in U
1159: * (Workspace: need N*N)
1160: *
1161: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1162: $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1163: *
1164: * Copy right singular vectors of R to A
1165: * (Workspace: need N*N)
1166: *
1167: CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1168: $ LDA )
1169: *
1170: ELSE
1171: *
1172: * Insufficient workspace for a fast algorithm
1173: *
1174: ITAU = 1
1175: IWORK = ITAU + N
1176: *
1177: * Compute A=Q*R, copying result to U
1178: * (Workspace: need 2*N, prefer N+N*NB)
1179: *
1180: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1181: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1182: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1183: *
1184: * Generate Q in U
1185: * (Workspace: need 2*N, prefer N+N*NB)
1186: *
1187: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1188: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1189: IE = ITAU
1190: ITAUQ = IE + N
1191: ITAUP = ITAUQ + N
1192: IWORK = ITAUP + N
1193: *
1194: * Zero out below R in A
1195: *
1196: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1197: $ LDA )
1198: *
1199: * Bidiagonalize R in A
1200: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1201: *
1202: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1203: $ WORK( ITAUQ ), WORK( ITAUP ),
1204: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1205: *
1206: * Multiply Q in U by left vectors bidiagonalizing R
1207: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1208: *
1209: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1210: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1211: $ LWORK-IWORK+1, IERR )
1212: *
1213: * Generate right vectors bidiagonalizing R in A
1214: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1215: *
1216: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1217: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1218: IWORK = IE + N
1219: *
1220: * Perform bidiagonal QR iteration, computing left
1221: * singular vectors of A in U and computing right
1222: * singular vectors of A in A
1223: * (Workspace: need BDSPAC)
1224: *
1225: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1226: $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1227: $ INFO )
1228: *
1229: END IF
1230: *
1231: ELSE IF( WNTVAS ) THEN
1232: *
1233: * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1234: * or 'A')
1235: * N left singular vectors to be computed in U and
1236: * N right singular vectors to be computed in VT
1237: *
1238: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1239: *
1240: * Sufficient workspace for a fast algorithm
1241: *
1242: IU = 1
1243: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1244: *
1245: * WORK(IU) is LDA by N
1246: *
1247: LDWRKU = LDA
1248: ELSE
1249: *
1250: * WORK(IU) is N by N
1251: *
1252: LDWRKU = N
1253: END IF
1254: ITAU = IU + LDWRKU*N
1255: IWORK = ITAU + N
1256: *
1257: * Compute A=Q*R
1258: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1259: *
1260: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1261: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1262: *
1263: * Copy R to WORK(IU), zeroing out below it
1264: *
1265: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1266: $ LDWRKU )
1267: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1268: $ WORK( IU+1 ), LDWRKU )
1269: *
1270: * Generate Q in A
1271: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1272: *
1273: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1274: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1275: IE = ITAU
1276: ITAUQ = IE + N
1277: ITAUP = ITAUQ + N
1278: IWORK = ITAUP + N
1279: *
1280: * Bidiagonalize R in WORK(IU), copying result to VT
1281: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1282: *
1283: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1284: $ WORK( IE ), WORK( ITAUQ ),
1285: $ WORK( ITAUP ), WORK( IWORK ),
1286: $ LWORK-IWORK+1, IERR )
1287: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1288: $ LDVT )
1289: *
1290: * Generate left bidiagonalizing vectors in WORK(IU)
1291: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1292: *
1293: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1294: $ WORK( ITAUQ ), WORK( IWORK ),
1295: $ LWORK-IWORK+1, IERR )
1296: *
1297: * Generate right bidiagonalizing vectors in VT
1298: * (Workspace: need N*N+4*N-1,
1299: * prefer N*N+3*N+(N-1)*NB)
1300: *
1301: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1302: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1303: IWORK = IE + N
1304: *
1305: * Perform bidiagonal QR iteration, computing left
1306: * singular vectors of R in WORK(IU) and computing
1307: * right singular vectors of R in VT
1308: * (Workspace: need N*N+BDSPAC)
1309: *
1310: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1311: $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1312: $ WORK( IWORK ), INFO )
1313: *
1314: * Multiply Q in A by left singular vectors of R in
1315: * WORK(IU), storing result in U
1316: * (Workspace: need N*N)
1317: *
1318: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1319: $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1320: *
1321: ELSE
1322: *
1323: * Insufficient workspace for a fast algorithm
1324: *
1325: ITAU = 1
1326: IWORK = ITAU + N
1327: *
1328: * Compute A=Q*R, copying result to U
1329: * (Workspace: need 2*N, prefer N+N*NB)
1330: *
1331: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1332: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1333: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1334: *
1335: * Generate Q in U
1336: * (Workspace: need 2*N, prefer N+N*NB)
1337: *
1338: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1339: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1340: *
1341: * Copy R to VT, zeroing out below it
1342: *
1343: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1344: IF( N.GT.1 )
1345: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1346: $ VT( 2, 1 ), LDVT )
1347: IE = ITAU
1348: ITAUQ = IE + N
1349: ITAUP = ITAUQ + N
1350: IWORK = ITAUP + N
1351: *
1352: * Bidiagonalize R in VT
1353: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1354: *
1355: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1356: $ WORK( ITAUQ ), WORK( ITAUP ),
1357: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1358: *
1359: * Multiply Q in U by left bidiagonalizing vectors
1360: * in VT
1361: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1362: *
1363: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1364: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1365: $ LWORK-IWORK+1, IERR )
1366: *
1367: * Generate right bidiagonalizing vectors in VT
1368: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1369: *
1370: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1371: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1372: IWORK = IE + N
1373: *
1374: * Perform bidiagonal QR iteration, computing left
1375: * singular vectors of A in U and computing right
1376: * singular vectors of A in VT
1377: * (Workspace: need BDSPAC)
1378: *
1379: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1380: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1381: $ INFO )
1382: *
1383: END IF
1384: *
1385: END IF
1386: *
1387: ELSE IF( WNTUA ) THEN
1388: *
1389: IF( WNTVN ) THEN
1390: *
1391: * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1392: * M left singular vectors to be computed in U and
1393: * no right singular vectors to be computed
1394: *
1395: IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1396: *
1397: * Sufficient workspace for a fast algorithm
1398: *
1399: IR = 1
1400: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1401: *
1402: * WORK(IR) is LDA by N
1403: *
1404: LDWRKR = LDA
1405: ELSE
1406: *
1407: * WORK(IR) is N by N
1408: *
1409: LDWRKR = N
1410: END IF
1411: ITAU = IR + LDWRKR*N
1412: IWORK = ITAU + N
1413: *
1414: * Compute A=Q*R, copying result to U
1415: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1416: *
1417: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1418: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1419: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1420: *
1421: * Copy R to WORK(IR), zeroing out below it
1422: *
1423: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1424: $ LDWRKR )
1425: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1426: $ WORK( IR+1 ), LDWRKR )
1427: *
1428: * Generate Q in U
1429: * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1430: *
1431: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1432: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1433: IE = ITAU
1434: ITAUQ = IE + N
1435: ITAUP = ITAUQ + N
1436: IWORK = ITAUP + N
1437: *
1438: * Bidiagonalize R in WORK(IR)
1439: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1440: *
1441: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1442: $ WORK( IE ), WORK( ITAUQ ),
1443: $ WORK( ITAUP ), WORK( IWORK ),
1444: $ LWORK-IWORK+1, IERR )
1445: *
1446: * Generate left bidiagonalizing vectors in WORK(IR)
1447: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1448: *
1449: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1450: $ WORK( ITAUQ ), WORK( IWORK ),
1451: $ LWORK-IWORK+1, IERR )
1452: IWORK = IE + N
1453: *
1454: * Perform bidiagonal QR iteration, computing left
1455: * singular vectors of R in WORK(IR)
1456: * (Workspace: need N*N+BDSPAC)
1457: *
1458: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1459: $ 1, WORK( IR ), LDWRKR, DUM, 1,
1460: $ WORK( IWORK ), INFO )
1461: *
1462: * Multiply Q in U by left singular vectors of R in
1463: * WORK(IR), storing result in A
1464: * (Workspace: need N*N)
1465: *
1466: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1467: $ WORK( IR ), LDWRKR, ZERO, A, LDA )
1468: *
1469: * Copy left singular vectors of A from A to U
1470: *
1471: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1472: *
1473: ELSE
1474: *
1475: * Insufficient workspace for a fast algorithm
1476: *
1477: ITAU = 1
1478: IWORK = ITAU + N
1479: *
1480: * Compute A=Q*R, copying result to U
1481: * (Workspace: need 2*N, prefer N+N*NB)
1482: *
1483: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1484: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1485: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1486: *
1487: * Generate Q in U
1488: * (Workspace: need N+M, prefer N+M*NB)
1489: *
1490: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1491: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1492: IE = ITAU
1493: ITAUQ = IE + N
1494: ITAUP = ITAUQ + N
1495: IWORK = ITAUP + N
1496: *
1497: * Zero out below R in A
1498: *
1499: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1500: $ LDA )
1501: *
1502: * Bidiagonalize R in A
1503: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1504: *
1505: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1506: $ WORK( ITAUQ ), WORK( ITAUP ),
1507: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1508: *
1509: * Multiply Q in U by left bidiagonalizing vectors
1510: * in A
1511: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1512: *
1513: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1514: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1515: $ LWORK-IWORK+1, IERR )
1516: IWORK = IE + N
1517: *
1518: * Perform bidiagonal QR iteration, computing left
1519: * singular vectors of A in U
1520: * (Workspace: need BDSPAC)
1521: *
1522: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1523: $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1524: $ INFO )
1525: *
1526: END IF
1527: *
1528: ELSE IF( WNTVO ) THEN
1529: *
1530: * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1531: * M left singular vectors to be computed in U and
1532: * N right singular vectors to be overwritten on A
1533: *
1534: IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1535: *
1536: * Sufficient workspace for a fast algorithm
1537: *
1538: IU = 1
1539: IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1540: *
1541: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1542: *
1543: LDWRKU = LDA
1544: IR = IU + LDWRKU*N
1545: LDWRKR = LDA
1546: ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1547: *
1548: * WORK(IU) is LDA by N and WORK(IR) is N by N
1549: *
1550: LDWRKU = LDA
1551: IR = IU + LDWRKU*N
1552: LDWRKR = N
1553: ELSE
1554: *
1555: * WORK(IU) is N by N and WORK(IR) is N by N
1556: *
1557: LDWRKU = N
1558: IR = IU + LDWRKU*N
1559: LDWRKR = N
1560: END IF
1561: ITAU = IR + LDWRKR*N
1562: IWORK = ITAU + N
1563: *
1564: * Compute A=Q*R, copying result to U
1565: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1566: *
1567: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1568: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1569: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1570: *
1571: * Generate Q in U
1572: * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1573: *
1574: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1575: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1576: *
1577: * Copy R to WORK(IU), zeroing out below it
1578: *
1579: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1580: $ LDWRKU )
1581: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1582: $ WORK( IU+1 ), LDWRKU )
1583: IE = ITAU
1584: ITAUQ = IE + N
1585: ITAUP = ITAUQ + N
1586: IWORK = ITAUP + N
1587: *
1588: * Bidiagonalize R in WORK(IU), copying result to
1589: * WORK(IR)
1590: * (Workspace: need 2*N*N+4*N,
1591: * prefer 2*N*N+3*N+2*N*NB)
1592: *
1593: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1594: $ WORK( IE ), WORK( ITAUQ ),
1595: $ WORK( ITAUP ), WORK( IWORK ),
1596: $ LWORK-IWORK+1, IERR )
1597: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1598: $ WORK( IR ), LDWRKR )
1599: *
1600: * Generate left bidiagonalizing vectors in WORK(IU)
1601: * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1602: *
1603: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1604: $ WORK( ITAUQ ), WORK( IWORK ),
1605: $ LWORK-IWORK+1, IERR )
1606: *
1607: * Generate right bidiagonalizing vectors in WORK(IR)
1608: * (Workspace: need 2*N*N+4*N-1,
1609: * prefer 2*N*N+3*N+(N-1)*NB)
1610: *
1611: CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1612: $ WORK( ITAUP ), WORK( IWORK ),
1613: $ LWORK-IWORK+1, IERR )
1614: IWORK = IE + N
1615: *
1616: * Perform bidiagonal QR iteration, computing left
1617: * singular vectors of R in WORK(IU) and computing
1618: * right singular vectors of R in WORK(IR)
1619: * (Workspace: need 2*N*N+BDSPAC)
1620: *
1621: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1622: $ WORK( IR ), LDWRKR, WORK( IU ),
1623: $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1624: *
1625: * Multiply Q in U by left singular vectors of R in
1626: * WORK(IU), storing result in A
1627: * (Workspace: need N*N)
1628: *
1629: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1630: $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1631: *
1632: * Copy left singular vectors of A from A to U
1633: *
1634: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1635: *
1636: * Copy right singular vectors of R from WORK(IR) to A
1637: *
1638: CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1639: $ LDA )
1640: *
1641: ELSE
1642: *
1643: * Insufficient workspace for a fast algorithm
1644: *
1645: ITAU = 1
1646: IWORK = ITAU + N
1647: *
1648: * Compute A=Q*R, copying result to U
1649: * (Workspace: need 2*N, prefer N+N*NB)
1650: *
1651: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1652: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1653: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1654: *
1655: * Generate Q in U
1656: * (Workspace: need N+M, prefer N+M*NB)
1657: *
1658: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1659: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1660: IE = ITAU
1661: ITAUQ = IE + N
1662: ITAUP = ITAUQ + N
1663: IWORK = ITAUP + N
1664: *
1665: * Zero out below R in A
1666: *
1667: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1668: $ LDA )
1669: *
1670: * Bidiagonalize R in A
1671: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1672: *
1673: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1674: $ WORK( ITAUQ ), WORK( ITAUP ),
1675: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1676: *
1677: * Multiply Q in U by left bidiagonalizing vectors
1678: * in A
1679: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1680: *
1681: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1682: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1683: $ LWORK-IWORK+1, IERR )
1684: *
1685: * Generate right bidiagonalizing vectors in A
1686: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1687: *
1688: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1689: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1690: IWORK = IE + N
1691: *
1692: * Perform bidiagonal QR iteration, computing left
1693: * singular vectors of A in U and computing right
1694: * singular vectors of A in A
1695: * (Workspace: need BDSPAC)
1696: *
1697: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1698: $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1699: $ INFO )
1700: *
1701: END IF
1702: *
1703: ELSE IF( WNTVAS ) THEN
1704: *
1705: * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1706: * or 'A')
1707: * M left singular vectors to be computed in U and
1708: * N right singular vectors to be computed in VT
1709: *
1710: IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1711: *
1712: * Sufficient workspace for a fast algorithm
1713: *
1714: IU = 1
1715: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1716: *
1717: * WORK(IU) is LDA by N
1718: *
1719: LDWRKU = LDA
1720: ELSE
1721: *
1722: * WORK(IU) is N by N
1723: *
1724: LDWRKU = N
1725: END IF
1726: ITAU = IU + LDWRKU*N
1727: IWORK = ITAU + N
1728: *
1729: * Compute A=Q*R, copying result to U
1730: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1731: *
1732: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1733: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1734: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1735: *
1736: * Generate Q in U
1737: * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1738: *
1739: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1740: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1741: *
1742: * Copy R to WORK(IU), zeroing out below it
1743: *
1744: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1745: $ LDWRKU )
1746: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1747: $ WORK( IU+1 ), LDWRKU )
1748: IE = ITAU
1749: ITAUQ = IE + N
1750: ITAUP = ITAUQ + N
1751: IWORK = ITAUP + N
1752: *
1753: * Bidiagonalize R in WORK(IU), copying result to VT
1754: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1755: *
1756: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1757: $ WORK( IE ), WORK( ITAUQ ),
1758: $ WORK( ITAUP ), WORK( IWORK ),
1759: $ LWORK-IWORK+1, IERR )
1760: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1761: $ LDVT )
1762: *
1763: * Generate left bidiagonalizing vectors in WORK(IU)
1764: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1765: *
1766: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1767: $ WORK( ITAUQ ), WORK( IWORK ),
1768: $ LWORK-IWORK+1, IERR )
1769: *
1770: * Generate right bidiagonalizing vectors in VT
1771: * (Workspace: need N*N+4*N-1,
1772: * prefer N*N+3*N+(N-1)*NB)
1773: *
1774: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1775: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1776: IWORK = IE + N
1777: *
1778: * Perform bidiagonal QR iteration, computing left
1779: * singular vectors of R in WORK(IU) and computing
1780: * right singular vectors of R in VT
1781: * (Workspace: need N*N+BDSPAC)
1782: *
1783: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1784: $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1785: $ WORK( IWORK ), INFO )
1786: *
1787: * Multiply Q in U by left singular vectors of R in
1788: * WORK(IU), storing result in A
1789: * (Workspace: need N*N)
1790: *
1791: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1792: $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1793: *
1794: * Copy left singular vectors of A from A to U
1795: *
1796: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1797: *
1798: ELSE
1799: *
1800: * Insufficient workspace for a fast algorithm
1801: *
1802: ITAU = 1
1803: IWORK = ITAU + N
1804: *
1805: * Compute A=Q*R, copying result to U
1806: * (Workspace: need 2*N, prefer N+N*NB)
1807: *
1808: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1809: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1810: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1811: *
1812: * Generate Q in U
1813: * (Workspace: need N+M, prefer N+M*NB)
1814: *
1815: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1816: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1817: *
1818: * Copy R from A to VT, zeroing out below it
1819: *
1820: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1821: IF( N.GT.1 )
1822: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1823: $ VT( 2, 1 ), LDVT )
1824: IE = ITAU
1825: ITAUQ = IE + N
1826: ITAUP = ITAUQ + N
1827: IWORK = ITAUP + N
1828: *
1829: * Bidiagonalize R in VT
1830: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1831: *
1832: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1833: $ WORK( ITAUQ ), WORK( ITAUP ),
1834: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1835: *
1836: * Multiply Q in U by left bidiagonalizing vectors
1837: * in VT
1838: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1839: *
1840: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1841: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1842: $ LWORK-IWORK+1, IERR )
1843: *
1844: * Generate right bidiagonalizing vectors in VT
1845: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1846: *
1847: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1848: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1849: IWORK = IE + N
1850: *
1851: * Perform bidiagonal QR iteration, computing left
1852: * singular vectors of A in U and computing right
1853: * singular vectors of A in VT
1854: * (Workspace: need BDSPAC)
1855: *
1856: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1857: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1858: $ INFO )
1859: *
1860: END IF
1861: *
1862: END IF
1863: *
1864: END IF
1865: *
1866: ELSE
1867: *
1868: * M .LT. MNTHR
1869: *
1870: * Path 10 (M at least N, but not much larger)
1871: * Reduce to bidiagonal form without QR decomposition
1872: *
1873: IE = 1
1874: ITAUQ = IE + N
1875: ITAUP = ITAUQ + N
1876: IWORK = ITAUP + N
1877: *
1878: * Bidiagonalize A
1879: * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1880: *
1881: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1882: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1883: $ IERR )
1884: IF( WNTUAS ) THEN
1885: *
1886: * If left singular vectors desired in U, copy result to U
1887: * and generate left bidiagonalizing vectors in U
1888: * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1889: *
1890: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1891: IF( WNTUS )
1892: $ NCU = N
1893: IF( WNTUA )
1894: $ NCU = M
1895: CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1896: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1897: END IF
1898: IF( WNTVAS ) THEN
1899: *
1900: * If right singular vectors desired in VT, copy result to
1901: * VT and generate right bidiagonalizing vectors in VT
1902: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1903: *
1904: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1905: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1906: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1907: END IF
1908: IF( WNTUO ) THEN
1909: *
1910: * If left singular vectors desired in A, generate left
1911: * bidiagonalizing vectors in A
1912: * (Workspace: need 4*N, prefer 3*N+N*NB)
1913: *
1914: CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1915: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1916: END IF
1917: IF( WNTVO ) THEN
1918: *
1919: * If right singular vectors desired in A, generate right
1920: * bidiagonalizing vectors in A
1921: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1922: *
1923: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1924: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1925: END IF
1926: IWORK = IE + N
1927: IF( WNTUAS .OR. WNTUO )
1928: $ NRU = M
1929: IF( WNTUN )
1930: $ NRU = 0
1931: IF( WNTVAS .OR. WNTVO )
1932: $ NCVT = N
1933: IF( WNTVN )
1934: $ NCVT = 0
1935: IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1936: *
1937: * Perform bidiagonal QR iteration, if desired, computing
1938: * left singular vectors in U and computing right singular
1939: * vectors in VT
1940: * (Workspace: need BDSPAC)
1941: *
1942: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1943: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1944: ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1945: *
1946: * Perform bidiagonal QR iteration, if desired, computing
1947: * left singular vectors in U and computing right singular
1948: * vectors in A
1949: * (Workspace: need BDSPAC)
1950: *
1951: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1952: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
1953: ELSE
1954: *
1955: * Perform bidiagonal QR iteration, if desired, computing
1956: * left singular vectors in A and computing right singular
1957: * vectors in VT
1958: * (Workspace: need BDSPAC)
1959: *
1960: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1961: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1962: END IF
1963: *
1964: END IF
1965: *
1966: ELSE
1967: *
1968: * A has more columns than rows. If A has sufficiently more
1969: * columns than rows, first reduce using the LQ decomposition (if
1970: * sufficient workspace available)
1971: *
1972: IF( N.GE.MNTHR ) THEN
1973: *
1974: IF( WNTVN ) THEN
1975: *
1976: * Path 1t(N much larger than M, JOBVT='N')
1977: * No right singular vectors to be computed
1978: *
1979: ITAU = 1
1980: IWORK = ITAU + M
1981: *
1982: * Compute A=L*Q
1983: * (Workspace: need 2*M, prefer M+M*NB)
1984: *
1985: CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
1986: $ LWORK-IWORK+1, IERR )
1987: *
1988: * Zero out above L
1989: *
1990: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
1991: IE = 1
1992: ITAUQ = IE + M
1993: ITAUP = ITAUQ + M
1994: IWORK = ITAUP + M
1995: *
1996: * Bidiagonalize L in A
1997: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
1998: *
1999: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2000: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2001: $ IERR )
2002: IF( WNTUO .OR. WNTUAS ) THEN
2003: *
2004: * If left singular vectors desired, generate Q
2005: * (Workspace: need 4*M, prefer 3*M+M*NB)
2006: *
2007: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2008: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2009: END IF
2010: IWORK = IE + M
2011: NRU = 0
2012: IF( WNTUO .OR. WNTUAS )
2013: $ NRU = M
2014: *
2015: * Perform bidiagonal QR iteration, computing left singular
2016: * vectors of A in A if desired
2017: * (Workspace: need BDSPAC)
2018: *
2019: CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2020: $ LDA, DUM, 1, WORK( IWORK ), INFO )
2021: *
2022: * If left singular vectors desired in U, copy them there
2023: *
2024: IF( WNTUAS )
2025: $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2026: *
2027: ELSE IF( WNTVO .AND. WNTUN ) THEN
2028: *
2029: * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2030: * M right singular vectors to be overwritten on A and
2031: * no left singular vectors to be computed
2032: *
2033: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2034: *
2035: * Sufficient workspace for a fast algorithm
2036: *
2037: IR = 1
2038: IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2039: *
2040: * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2041: *
2042: LDWRKU = LDA
2043: CHUNK = N
2044: LDWRKR = LDA
2045: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2046: *
2047: * WORK(IU) is LDA by N and WORK(IR) is M by M
2048: *
2049: LDWRKU = LDA
2050: CHUNK = N
2051: LDWRKR = M
2052: ELSE
2053: *
2054: * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2055: *
2056: LDWRKU = M
2057: CHUNK = ( LWORK-M*M-M ) / M
2058: LDWRKR = M
2059: END IF
2060: ITAU = IR + LDWRKR*M
2061: IWORK = ITAU + M
2062: *
2063: * Compute A=L*Q
2064: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2065: *
2066: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2067: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2068: *
2069: * Copy L to WORK(IR) and zero out above it
2070: *
2071: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2072: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2073: $ WORK( IR+LDWRKR ), LDWRKR )
2074: *
2075: * Generate Q in A
2076: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2077: *
2078: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2079: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2080: IE = ITAU
2081: ITAUQ = IE + M
2082: ITAUP = ITAUQ + M
2083: IWORK = ITAUP + M
2084: *
2085: * Bidiagonalize L in WORK(IR)
2086: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2087: *
2088: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2089: $ WORK( ITAUQ ), WORK( ITAUP ),
2090: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2091: *
2092: * Generate right vectors bidiagonalizing L
2093: * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2094: *
2095: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2096: $ WORK( ITAUP ), WORK( IWORK ),
2097: $ LWORK-IWORK+1, IERR )
2098: IWORK = IE + M
2099: *
2100: * Perform bidiagonal QR iteration, computing right
2101: * singular vectors of L in WORK(IR)
2102: * (Workspace: need M*M+BDSPAC)
2103: *
2104: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2105: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2106: $ WORK( IWORK ), INFO )
2107: IU = IE + M
2108: *
2109: * Multiply right singular vectors of L in WORK(IR) by Q
2110: * in A, storing result in WORK(IU) and copying to A
2111: * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2112: *
2113: DO 30 I = 1, N, CHUNK
2114: BLK = MIN( N-I+1, CHUNK )
2115: CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2116: $ LDWRKR, A( 1, I ), LDA, ZERO,
2117: $ WORK( IU ), LDWRKU )
2118: CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2119: $ A( 1, I ), LDA )
2120: 30 CONTINUE
2121: *
2122: ELSE
2123: *
2124: * Insufficient workspace for a fast algorithm
2125: *
2126: IE = 1
2127: ITAUQ = IE + M
2128: ITAUP = ITAUQ + M
2129: IWORK = ITAUP + M
2130: *
2131: * Bidiagonalize A
2132: * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2133: *
2134: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2135: $ WORK( ITAUQ ), WORK( ITAUP ),
2136: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2137: *
2138: * Generate right vectors bidiagonalizing A
2139: * (Workspace: need 4*M, prefer 3*M+M*NB)
2140: *
2141: CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2142: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2143: IWORK = IE + M
2144: *
2145: * Perform bidiagonal QR iteration, computing right
2146: * singular vectors of A in A
2147: * (Workspace: need BDSPAC)
2148: *
2149: CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2150: $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2151: *
2152: END IF
2153: *
2154: ELSE IF( WNTVO .AND. WNTUAS ) THEN
2155: *
2156: * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2157: * M right singular vectors to be overwritten on A and
2158: * M left singular vectors to be computed in U
2159: *
2160: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2161: *
2162: * Sufficient workspace for a fast algorithm
2163: *
2164: IR = 1
2165: IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2166: *
2167: * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2168: *
2169: LDWRKU = LDA
2170: CHUNK = N
2171: LDWRKR = LDA
2172: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2173: *
2174: * WORK(IU) is LDA by N and WORK(IR) is M by M
2175: *
2176: LDWRKU = LDA
2177: CHUNK = N
2178: LDWRKR = M
2179: ELSE
2180: *
2181: * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2182: *
2183: LDWRKU = M
2184: CHUNK = ( LWORK-M*M-M ) / M
2185: LDWRKR = M
2186: END IF
2187: ITAU = IR + LDWRKR*M
2188: IWORK = ITAU + M
2189: *
2190: * Compute A=L*Q
2191: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2192: *
2193: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2194: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2195: *
2196: * Copy L to U, zeroing about above it
2197: *
2198: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2199: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2200: $ LDU )
2201: *
2202: * Generate Q in A
2203: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2204: *
2205: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2206: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2207: IE = ITAU
2208: ITAUQ = IE + M
2209: ITAUP = ITAUQ + M
2210: IWORK = ITAUP + M
2211: *
2212: * Bidiagonalize L in U, copying result to WORK(IR)
2213: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2214: *
2215: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2216: $ WORK( ITAUQ ), WORK( ITAUP ),
2217: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2218: CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2219: *
2220: * Generate right vectors bidiagonalizing L in WORK(IR)
2221: * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2222: *
2223: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2224: $ WORK( ITAUP ), WORK( IWORK ),
2225: $ LWORK-IWORK+1, IERR )
2226: *
2227: * Generate left vectors bidiagonalizing L in U
2228: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2229: *
2230: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2231: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2232: IWORK = IE + M
2233: *
2234: * Perform bidiagonal QR iteration, computing left
2235: * singular vectors of L in U, and computing right
2236: * singular vectors of L in WORK(IR)
2237: * (Workspace: need M*M+BDSPAC)
2238: *
2239: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2240: $ WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2241: $ WORK( IWORK ), INFO )
2242: IU = IE + M
2243: *
2244: * Multiply right singular vectors of L in WORK(IR) by Q
2245: * in A, storing result in WORK(IU) and copying to A
2246: * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2247: *
2248: DO 40 I = 1, N, CHUNK
2249: BLK = MIN( N-I+1, CHUNK )
2250: CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2251: $ LDWRKR, A( 1, I ), LDA, ZERO,
2252: $ WORK( IU ), LDWRKU )
2253: CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2254: $ A( 1, I ), LDA )
2255: 40 CONTINUE
2256: *
2257: ELSE
2258: *
2259: * Insufficient workspace for a fast algorithm
2260: *
2261: ITAU = 1
2262: IWORK = ITAU + M
2263: *
2264: * Compute A=L*Q
2265: * (Workspace: need 2*M, prefer M+M*NB)
2266: *
2267: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2268: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2269: *
2270: * Copy L to U, zeroing out above it
2271: *
2272: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2273: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2274: $ LDU )
2275: *
2276: * Generate Q in A
2277: * (Workspace: need 2*M, prefer M+M*NB)
2278: *
2279: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2280: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2281: IE = ITAU
2282: ITAUQ = IE + M
2283: ITAUP = ITAUQ + M
2284: IWORK = ITAUP + M
2285: *
2286: * Bidiagonalize L in U
2287: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2288: *
2289: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2290: $ WORK( ITAUQ ), WORK( ITAUP ),
2291: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2292: *
2293: * Multiply right vectors bidiagonalizing L by Q in A
2294: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2295: *
2296: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2297: $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2298: $ LWORK-IWORK+1, IERR )
2299: *
2300: * Generate left vectors bidiagonalizing L in U
2301: * (Workspace: need 4*M, prefer 3*M+M*NB)
2302: *
2303: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2304: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2305: IWORK = IE + M
2306: *
2307: * Perform bidiagonal QR iteration, computing left
2308: * singular vectors of A in U and computing right
2309: * singular vectors of A in A
2310: * (Workspace: need BDSPAC)
2311: *
2312: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2313: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
2314: *
2315: END IF
2316: *
2317: ELSE IF( WNTVS ) THEN
2318: *
2319: IF( WNTUN ) THEN
2320: *
2321: * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2322: * M right singular vectors to be computed in VT and
2323: * no left singular vectors to be computed
2324: *
2325: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2326: *
2327: * Sufficient workspace for a fast algorithm
2328: *
2329: IR = 1
2330: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2331: *
2332: * WORK(IR) is LDA by M
2333: *
2334: LDWRKR = LDA
2335: ELSE
2336: *
2337: * WORK(IR) is M by M
2338: *
2339: LDWRKR = M
2340: END IF
2341: ITAU = IR + LDWRKR*M
2342: IWORK = ITAU + M
2343: *
2344: * Compute A=L*Q
2345: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2346: *
2347: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2348: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2349: *
2350: * Copy L to WORK(IR), zeroing out above it
2351: *
2352: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2353: $ LDWRKR )
2354: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2355: $ WORK( IR+LDWRKR ), LDWRKR )
2356: *
2357: * Generate Q in A
2358: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2359: *
2360: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2361: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2362: IE = ITAU
2363: ITAUQ = IE + M
2364: ITAUP = ITAUQ + M
2365: IWORK = ITAUP + M
2366: *
2367: * Bidiagonalize L in WORK(IR)
2368: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2369: *
2370: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2371: $ WORK( IE ), WORK( ITAUQ ),
2372: $ WORK( ITAUP ), WORK( IWORK ),
2373: $ LWORK-IWORK+1, IERR )
2374: *
2375: * Generate right vectors bidiagonalizing L in
2376: * WORK(IR)
2377: * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2378: *
2379: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2380: $ WORK( ITAUP ), WORK( IWORK ),
2381: $ LWORK-IWORK+1, IERR )
2382: IWORK = IE + M
2383: *
2384: * Perform bidiagonal QR iteration, computing right
2385: * singular vectors of L in WORK(IR)
2386: * (Workspace: need M*M+BDSPAC)
2387: *
2388: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2389: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2390: $ WORK( IWORK ), INFO )
2391: *
2392: * Multiply right singular vectors of L in WORK(IR) by
2393: * Q in A, storing result in VT
2394: * (Workspace: need M*M)
2395: *
2396: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2397: $ LDWRKR, A, LDA, ZERO, VT, LDVT )
2398: *
2399: ELSE
2400: *
2401: * Insufficient workspace for a fast algorithm
2402: *
2403: ITAU = 1
2404: IWORK = ITAU + M
2405: *
2406: * Compute A=L*Q
2407: * (Workspace: need 2*M, prefer M+M*NB)
2408: *
2409: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2410: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2411: *
2412: * Copy result to VT
2413: *
2414: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2415: *
2416: * Generate Q in VT
2417: * (Workspace: need 2*M, prefer M+M*NB)
2418: *
2419: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2420: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2421: IE = ITAU
2422: ITAUQ = IE + M
2423: ITAUP = ITAUQ + M
2424: IWORK = ITAUP + M
2425: *
2426: * Zero out above L in A
2427: *
2428: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2429: $ LDA )
2430: *
2431: * Bidiagonalize L in A
2432: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2433: *
2434: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2435: $ WORK( ITAUQ ), WORK( ITAUP ),
2436: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2437: *
2438: * Multiply right vectors bidiagonalizing L by Q in VT
2439: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2440: *
2441: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2442: $ WORK( ITAUP ), VT, LDVT,
2443: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2444: IWORK = IE + M
2445: *
2446: * Perform bidiagonal QR iteration, computing right
2447: * singular vectors of A in VT
2448: * (Workspace: need BDSPAC)
2449: *
2450: CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2451: $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2452: $ INFO )
2453: *
2454: END IF
2455: *
2456: ELSE IF( WNTUO ) THEN
2457: *
2458: * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2459: * M right singular vectors to be computed in VT and
2460: * M left singular vectors to be overwritten on A
2461: *
2462: IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2463: *
2464: * Sufficient workspace for a fast algorithm
2465: *
2466: IU = 1
2467: IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2468: *
2469: * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2470: *
2471: LDWRKU = LDA
2472: IR = IU + LDWRKU*M
2473: LDWRKR = LDA
2474: ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2475: *
2476: * WORK(IU) is LDA by M and WORK(IR) is M by M
2477: *
2478: LDWRKU = LDA
2479: IR = IU + LDWRKU*M
2480: LDWRKR = M
2481: ELSE
2482: *
2483: * WORK(IU) is M by M and WORK(IR) is M by M
2484: *
2485: LDWRKU = M
2486: IR = IU + LDWRKU*M
2487: LDWRKR = M
2488: END IF
2489: ITAU = IR + LDWRKR*M
2490: IWORK = ITAU + M
2491: *
2492: * Compute A=L*Q
2493: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2494: *
2495: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2496: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2497: *
2498: * Copy L to WORK(IU), zeroing out below it
2499: *
2500: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2501: $ LDWRKU )
2502: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2503: $ WORK( IU+LDWRKU ), LDWRKU )
2504: *
2505: * Generate Q in A
2506: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2507: *
2508: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2509: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2510: IE = ITAU
2511: ITAUQ = IE + M
2512: ITAUP = ITAUQ + M
2513: IWORK = ITAUP + M
2514: *
2515: * Bidiagonalize L in WORK(IU), copying result to
2516: * WORK(IR)
2517: * (Workspace: need 2*M*M+4*M,
2518: * prefer 2*M*M+3*M+2*M*NB)
2519: *
2520: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2521: $ WORK( IE ), WORK( ITAUQ ),
2522: $ WORK( ITAUP ), WORK( IWORK ),
2523: $ LWORK-IWORK+1, IERR )
2524: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2525: $ WORK( IR ), LDWRKR )
2526: *
2527: * Generate right bidiagonalizing vectors in WORK(IU)
2528: * (Workspace: need 2*M*M+4*M-1,
2529: * prefer 2*M*M+3*M+(M-1)*NB)
2530: *
2531: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2532: $ WORK( ITAUP ), WORK( IWORK ),
2533: $ LWORK-IWORK+1, IERR )
2534: *
2535: * Generate left bidiagonalizing vectors in WORK(IR)
2536: * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2537: *
2538: CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2539: $ WORK( ITAUQ ), WORK( IWORK ),
2540: $ LWORK-IWORK+1, IERR )
2541: IWORK = IE + M
2542: *
2543: * Perform bidiagonal QR iteration, computing left
2544: * singular vectors of L in WORK(IR) and computing
2545: * right singular vectors of L in WORK(IU)
2546: * (Workspace: need 2*M*M+BDSPAC)
2547: *
2548: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2549: $ WORK( IU ), LDWRKU, WORK( IR ),
2550: $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2551: *
2552: * Multiply right singular vectors of L in WORK(IU) by
2553: * Q in A, storing result in VT
2554: * (Workspace: need M*M)
2555: *
2556: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2557: $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2558: *
2559: * Copy left singular vectors of L to A
2560: * (Workspace: need M*M)
2561: *
2562: CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2563: $ LDA )
2564: *
2565: ELSE
2566: *
2567: * Insufficient workspace for a fast algorithm
2568: *
2569: ITAU = 1
2570: IWORK = ITAU + M
2571: *
2572: * Compute A=L*Q, copying result to VT
2573: * (Workspace: need 2*M, prefer M+M*NB)
2574: *
2575: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2576: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2577: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2578: *
2579: * Generate Q in VT
2580: * (Workspace: need 2*M, prefer M+M*NB)
2581: *
2582: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2583: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2584: IE = ITAU
2585: ITAUQ = IE + M
2586: ITAUP = ITAUQ + M
2587: IWORK = ITAUP + M
2588: *
2589: * Zero out above L in A
2590: *
2591: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2592: $ LDA )
2593: *
2594: * Bidiagonalize L in A
2595: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2596: *
2597: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2598: $ WORK( ITAUQ ), WORK( ITAUP ),
2599: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2600: *
2601: * Multiply right vectors bidiagonalizing L by Q in VT
2602: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2603: *
2604: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2605: $ WORK( ITAUP ), VT, LDVT,
2606: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2607: *
2608: * Generate left bidiagonalizing vectors of L in A
2609: * (Workspace: need 4*M, prefer 3*M+M*NB)
2610: *
2611: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2612: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2613: IWORK = IE + M
2614: *
2615: * Perform bidiagonal QR iteration, compute left
2616: * singular vectors of A in A and compute right
2617: * singular vectors of A in VT
2618: * (Workspace: need BDSPAC)
2619: *
2620: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2621: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2622: $ INFO )
2623: *
2624: END IF
2625: *
2626: ELSE IF( WNTUAS ) THEN
2627: *
2628: * Path 6t(N much larger than M, JOBU='S' or 'A',
2629: * JOBVT='S')
2630: * M right singular vectors to be computed in VT and
2631: * M left singular vectors to be computed in U
2632: *
2633: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2634: *
2635: * Sufficient workspace for a fast algorithm
2636: *
2637: IU = 1
2638: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2639: *
2640: * WORK(IU) is LDA by N
2641: *
2642: LDWRKU = LDA
2643: ELSE
2644: *
2645: * WORK(IU) is LDA by M
2646: *
2647: LDWRKU = M
2648: END IF
2649: ITAU = IU + LDWRKU*M
2650: IWORK = ITAU + M
2651: *
2652: * Compute A=L*Q
2653: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2654: *
2655: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2656: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2657: *
2658: * Copy L to WORK(IU), zeroing out above it
2659: *
2660: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2661: $ LDWRKU )
2662: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2663: $ WORK( IU+LDWRKU ), LDWRKU )
2664: *
2665: * Generate Q in A
2666: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2667: *
2668: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2669: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2670: IE = ITAU
2671: ITAUQ = IE + M
2672: ITAUP = ITAUQ + M
2673: IWORK = ITAUP + M
2674: *
2675: * Bidiagonalize L in WORK(IU), copying result to U
2676: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2677: *
2678: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2679: $ WORK( IE ), WORK( ITAUQ ),
2680: $ WORK( ITAUP ), WORK( IWORK ),
2681: $ LWORK-IWORK+1, IERR )
2682: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2683: $ LDU )
2684: *
2685: * Generate right bidiagonalizing vectors in WORK(IU)
2686: * (Workspace: need M*M+4*M-1,
2687: * prefer M*M+3*M+(M-1)*NB)
2688: *
2689: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2690: $ WORK( ITAUP ), WORK( IWORK ),
2691: $ LWORK-IWORK+1, IERR )
2692: *
2693: * Generate left bidiagonalizing vectors in U
2694: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2695: *
2696: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2697: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2698: IWORK = IE + M
2699: *
2700: * Perform bidiagonal QR iteration, computing left
2701: * singular vectors of L in U and computing right
2702: * singular vectors of L in WORK(IU)
2703: * (Workspace: need M*M+BDSPAC)
2704: *
2705: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2706: $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2707: $ WORK( IWORK ), INFO )
2708: *
2709: * Multiply right singular vectors of L in WORK(IU) by
2710: * Q in A, storing result in VT
2711: * (Workspace: need M*M)
2712: *
2713: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2714: $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2715: *
2716: ELSE
2717: *
2718: * Insufficient workspace for a fast algorithm
2719: *
2720: ITAU = 1
2721: IWORK = ITAU + M
2722: *
2723: * Compute A=L*Q, copying result to VT
2724: * (Workspace: need 2*M, prefer M+M*NB)
2725: *
2726: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2727: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2728: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2729: *
2730: * Generate Q in VT
2731: * (Workspace: need 2*M, prefer M+M*NB)
2732: *
2733: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2734: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2735: *
2736: * Copy L to U, zeroing out above it
2737: *
2738: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2739: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2740: $ LDU )
2741: IE = ITAU
2742: ITAUQ = IE + M
2743: ITAUP = ITAUQ + M
2744: IWORK = ITAUP + M
2745: *
2746: * Bidiagonalize L in U
2747: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2748: *
2749: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2750: $ WORK( ITAUQ ), WORK( ITAUP ),
2751: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2752: *
2753: * Multiply right bidiagonalizing vectors in U by Q
2754: * in VT
2755: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2756: *
2757: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2758: $ WORK( ITAUP ), VT, LDVT,
2759: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2760: *
2761: * Generate left bidiagonalizing vectors in U
2762: * (Workspace: need 4*M, prefer 3*M+M*NB)
2763: *
2764: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2765: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2766: IWORK = IE + M
2767: *
2768: * Perform bidiagonal QR iteration, computing left
2769: * singular vectors of A in U and computing right
2770: * singular vectors of A in VT
2771: * (Workspace: need BDSPAC)
2772: *
2773: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2774: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2775: $ INFO )
2776: *
2777: END IF
2778: *
2779: END IF
2780: *
2781: ELSE IF( WNTVA ) THEN
2782: *
2783: IF( WNTUN ) THEN
2784: *
2785: * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2786: * N right singular vectors to be computed in VT and
2787: * no left singular vectors to be computed
2788: *
2789: IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2790: *
2791: * Sufficient workspace for a fast algorithm
2792: *
2793: IR = 1
2794: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2795: *
2796: * WORK(IR) is LDA by M
2797: *
2798: LDWRKR = LDA
2799: ELSE
2800: *
2801: * WORK(IR) is M by M
2802: *
2803: LDWRKR = M
2804: END IF
2805: ITAU = IR + LDWRKR*M
2806: IWORK = ITAU + M
2807: *
2808: * Compute A=L*Q, copying result to VT
2809: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2810: *
2811: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2812: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2813: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2814: *
2815: * Copy L to WORK(IR), zeroing out above it
2816: *
2817: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2818: $ LDWRKR )
2819: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2820: $ WORK( IR+LDWRKR ), LDWRKR )
2821: *
2822: * Generate Q in VT
2823: * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2824: *
2825: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2826: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2827: IE = ITAU
2828: ITAUQ = IE + M
2829: ITAUP = ITAUQ + M
2830: IWORK = ITAUP + M
2831: *
2832: * Bidiagonalize L in WORK(IR)
2833: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2834: *
2835: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2836: $ WORK( IE ), WORK( ITAUQ ),
2837: $ WORK( ITAUP ), WORK( IWORK ),
2838: $ LWORK-IWORK+1, IERR )
2839: *
2840: * Generate right bidiagonalizing vectors in WORK(IR)
2841: * (Workspace: need M*M+4*M-1,
2842: * prefer M*M+3*M+(M-1)*NB)
2843: *
2844: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2845: $ WORK( ITAUP ), WORK( IWORK ),
2846: $ LWORK-IWORK+1, IERR )
2847: IWORK = IE + M
2848: *
2849: * Perform bidiagonal QR iteration, computing right
2850: * singular vectors of L in WORK(IR)
2851: * (Workspace: need M*M+BDSPAC)
2852: *
2853: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2854: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2855: $ WORK( IWORK ), INFO )
2856: *
2857: * Multiply right singular vectors of L in WORK(IR) by
2858: * Q in VT, storing result in A
2859: * (Workspace: need M*M)
2860: *
2861: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2862: $ LDWRKR, VT, LDVT, ZERO, A, LDA )
2863: *
2864: * Copy right singular vectors of A from A to VT
2865: *
2866: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2867: *
2868: ELSE
2869: *
2870: * Insufficient workspace for a fast algorithm
2871: *
2872: ITAU = 1
2873: IWORK = ITAU + M
2874: *
2875: * Compute A=L*Q, copying result to VT
2876: * (Workspace: need 2*M, prefer M+M*NB)
2877: *
2878: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2879: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2880: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2881: *
2882: * Generate Q in VT
2883: * (Workspace: need M+N, prefer M+N*NB)
2884: *
2885: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2886: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2887: IE = ITAU
2888: ITAUQ = IE + M
2889: ITAUP = ITAUQ + M
2890: IWORK = ITAUP + M
2891: *
2892: * Zero out above L in A
2893: *
2894: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2895: $ LDA )
2896: *
2897: * Bidiagonalize L in A
2898: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2899: *
2900: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2901: $ WORK( ITAUQ ), WORK( ITAUP ),
2902: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2903: *
2904: * Multiply right bidiagonalizing vectors in A by Q
2905: * in VT
2906: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2907: *
2908: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2909: $ WORK( ITAUP ), VT, LDVT,
2910: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2911: IWORK = IE + M
2912: *
2913: * Perform bidiagonal QR iteration, computing right
2914: * singular vectors of A in VT
2915: * (Workspace: need BDSPAC)
2916: *
2917: CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2918: $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2919: $ INFO )
2920: *
2921: END IF
2922: *
2923: ELSE IF( WNTUO ) THEN
2924: *
2925: * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2926: * N right singular vectors to be computed in VT and
2927: * M left singular vectors to be overwritten on A
2928: *
2929: IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2930: *
2931: * Sufficient workspace for a fast algorithm
2932: *
2933: IU = 1
2934: IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2935: *
2936: * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2937: *
2938: LDWRKU = LDA
2939: IR = IU + LDWRKU*M
2940: LDWRKR = LDA
2941: ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2942: *
2943: * WORK(IU) is LDA by M and WORK(IR) is M by M
2944: *
2945: LDWRKU = LDA
2946: IR = IU + LDWRKU*M
2947: LDWRKR = M
2948: ELSE
2949: *
2950: * WORK(IU) is M by M and WORK(IR) is M by M
2951: *
2952: LDWRKU = M
2953: IR = IU + LDWRKU*M
2954: LDWRKR = M
2955: END IF
2956: ITAU = IR + LDWRKR*M
2957: IWORK = ITAU + M
2958: *
2959: * Compute A=L*Q, copying result to VT
2960: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2961: *
2962: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2963: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2964: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2965: *
2966: * Generate Q in VT
2967: * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2968: *
2969: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2970: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2971: *
2972: * Copy L to WORK(IU), zeroing out above it
2973: *
2974: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2975: $ LDWRKU )
2976: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2977: $ WORK( IU+LDWRKU ), LDWRKU )
2978: IE = ITAU
2979: ITAUQ = IE + M
2980: ITAUP = ITAUQ + M
2981: IWORK = ITAUP + M
2982: *
2983: * Bidiagonalize L in WORK(IU), copying result to
2984: * WORK(IR)
2985: * (Workspace: need 2*M*M+4*M,
2986: * prefer 2*M*M+3*M+2*M*NB)
2987: *
2988: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2989: $ WORK( IE ), WORK( ITAUQ ),
2990: $ WORK( ITAUP ), WORK( IWORK ),
2991: $ LWORK-IWORK+1, IERR )
2992: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2993: $ WORK( IR ), LDWRKR )
2994: *
2995: * Generate right bidiagonalizing vectors in WORK(IU)
2996: * (Workspace: need 2*M*M+4*M-1,
2997: * prefer 2*M*M+3*M+(M-1)*NB)
2998: *
2999: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3000: $ WORK( ITAUP ), WORK( IWORK ),
3001: $ LWORK-IWORK+1, IERR )
3002: *
3003: * Generate left bidiagonalizing vectors in WORK(IR)
3004: * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3005: *
3006: CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3007: $ WORK( ITAUQ ), WORK( IWORK ),
3008: $ LWORK-IWORK+1, IERR )
3009: IWORK = IE + M
3010: *
3011: * Perform bidiagonal QR iteration, computing left
3012: * singular vectors of L in WORK(IR) and computing
3013: * right singular vectors of L in WORK(IU)
3014: * (Workspace: need 2*M*M+BDSPAC)
3015: *
3016: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3017: $ WORK( IU ), LDWRKU, WORK( IR ),
3018: $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3019: *
3020: * Multiply right singular vectors of L in WORK(IU) by
3021: * Q in VT, storing result in A
3022: * (Workspace: need M*M)
3023: *
3024: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3025: $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3026: *
3027: * Copy right singular vectors of A from A to VT
3028: *
3029: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3030: *
3031: * Copy left singular vectors of A from WORK(IR) to A
3032: *
3033: CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3034: $ LDA )
3035: *
3036: ELSE
3037: *
3038: * Insufficient workspace for a fast algorithm
3039: *
3040: ITAU = 1
3041: IWORK = ITAU + M
3042: *
3043: * Compute A=L*Q, copying result to VT
3044: * (Workspace: need 2*M, prefer M+M*NB)
3045: *
3046: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3047: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3048: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3049: *
3050: * Generate Q in VT
3051: * (Workspace: need M+N, prefer M+N*NB)
3052: *
3053: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3054: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3055: IE = ITAU
3056: ITAUQ = IE + M
3057: ITAUP = ITAUQ + M
3058: IWORK = ITAUP + M
3059: *
3060: * Zero out above L in A
3061: *
3062: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3063: $ LDA )
3064: *
3065: * Bidiagonalize L in A
3066: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3067: *
3068: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3069: $ WORK( ITAUQ ), WORK( ITAUP ),
3070: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3071: *
3072: * Multiply right bidiagonalizing vectors in A by Q
3073: * in VT
3074: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3075: *
3076: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3077: $ WORK( ITAUP ), VT, LDVT,
3078: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3079: *
3080: * Generate left bidiagonalizing vectors in A
3081: * (Workspace: need 4*M, prefer 3*M+M*NB)
3082: *
3083: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3084: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3085: IWORK = IE + M
3086: *
3087: * Perform bidiagonal QR iteration, computing left
3088: * singular vectors of A in A and computing right
3089: * singular vectors of A in VT
3090: * (Workspace: need BDSPAC)
3091: *
3092: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3093: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3094: $ INFO )
3095: *
3096: END IF
3097: *
3098: ELSE IF( WNTUAS ) THEN
3099: *
3100: * Path 9t(N much larger than M, JOBU='S' or 'A',
3101: * JOBVT='A')
3102: * N right singular vectors to be computed in VT and
3103: * M left singular vectors to be computed in U
3104: *
3105: IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3106: *
3107: * Sufficient workspace for a fast algorithm
3108: *
3109: IU = 1
3110: IF( LWORK.GE.WRKBL+LDA*M ) THEN
3111: *
3112: * WORK(IU) is LDA by M
3113: *
3114: LDWRKU = LDA
3115: ELSE
3116: *
3117: * WORK(IU) is M by M
3118: *
3119: LDWRKU = M
3120: END IF
3121: ITAU = IU + LDWRKU*M
3122: IWORK = ITAU + M
3123: *
3124: * Compute A=L*Q, copying result to VT
3125: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3126: *
3127: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3128: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3129: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3130: *
3131: * Generate Q in VT
3132: * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3133: *
3134: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3135: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3136: *
3137: * Copy L to WORK(IU), zeroing out above it
3138: *
3139: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3140: $ LDWRKU )
3141: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3142: $ WORK( IU+LDWRKU ), LDWRKU )
3143: IE = ITAU
3144: ITAUQ = IE + M
3145: ITAUP = ITAUQ + M
3146: IWORK = ITAUP + M
3147: *
3148: * Bidiagonalize L in WORK(IU), copying result to U
3149: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3150: *
3151: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3152: $ WORK( IE ), WORK( ITAUQ ),
3153: $ WORK( ITAUP ), WORK( IWORK ),
3154: $ LWORK-IWORK+1, IERR )
3155: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3156: $ LDU )
3157: *
3158: * Generate right bidiagonalizing vectors in WORK(IU)
3159: * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3160: *
3161: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3162: $ WORK( ITAUP ), WORK( IWORK ),
3163: $ LWORK-IWORK+1, IERR )
3164: *
3165: * Generate left bidiagonalizing vectors in U
3166: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3167: *
3168: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3169: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3170: IWORK = IE + M
3171: *
3172: * Perform bidiagonal QR iteration, computing left
3173: * singular vectors of L in U and computing right
3174: * singular vectors of L in WORK(IU)
3175: * (Workspace: need M*M+BDSPAC)
3176: *
3177: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3178: $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3179: $ WORK( IWORK ), INFO )
3180: *
3181: * Multiply right singular vectors of L in WORK(IU) by
3182: * Q in VT, storing result in A
3183: * (Workspace: need M*M)
3184: *
3185: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3186: $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3187: *
3188: * Copy right singular vectors of A from A to VT
3189: *
3190: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3191: *
3192: ELSE
3193: *
3194: * Insufficient workspace for a fast algorithm
3195: *
3196: ITAU = 1
3197: IWORK = ITAU + M
3198: *
3199: * Compute A=L*Q, copying result to VT
3200: * (Workspace: need 2*M, prefer M+M*NB)
3201: *
3202: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3203: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3204: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3205: *
3206: * Generate Q in VT
3207: * (Workspace: need M+N, prefer M+N*NB)
3208: *
3209: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3210: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3211: *
3212: * Copy L to U, zeroing out above it
3213: *
3214: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3215: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3216: $ LDU )
3217: IE = ITAU
3218: ITAUQ = IE + M
3219: ITAUP = ITAUQ + M
3220: IWORK = ITAUP + M
3221: *
3222: * Bidiagonalize L in U
3223: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3224: *
3225: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3226: $ WORK( ITAUQ ), WORK( ITAUP ),
3227: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3228: *
3229: * Multiply right bidiagonalizing vectors in U by Q
3230: * in VT
3231: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3232: *
3233: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3234: $ WORK( ITAUP ), VT, LDVT,
3235: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3236: *
3237: * Generate left bidiagonalizing vectors in U
3238: * (Workspace: need 4*M, prefer 3*M+M*NB)
3239: *
3240: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3241: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3242: IWORK = IE + M
3243: *
3244: * Perform bidiagonal QR iteration, computing left
3245: * singular vectors of A in U and computing right
3246: * singular vectors of A in VT
3247: * (Workspace: need BDSPAC)
3248: *
3249: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3250: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3251: $ INFO )
3252: *
3253: END IF
3254: *
3255: END IF
3256: *
3257: END IF
3258: *
3259: ELSE
3260: *
3261: * N .LT. MNTHR
3262: *
3263: * Path 10t(N greater than M, but not much larger)
3264: * Reduce to bidiagonal form without LQ decomposition
3265: *
3266: IE = 1
3267: ITAUQ = IE + M
3268: ITAUP = ITAUQ + M
3269: IWORK = ITAUP + M
3270: *
3271: * Bidiagonalize A
3272: * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3273: *
3274: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3275: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3276: $ IERR )
3277: IF( WNTUAS ) THEN
3278: *
3279: * If left singular vectors desired in U, copy result to U
3280: * and generate left bidiagonalizing vectors in U
3281: * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3282: *
3283: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3284: CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3285: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3286: END IF
3287: IF( WNTVAS ) THEN
3288: *
3289: * If right singular vectors desired in VT, copy result to
3290: * VT and generate right bidiagonalizing vectors in VT
3291: * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3292: *
3293: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3294: IF( WNTVA )
3295: $ NRVT = N
3296: IF( WNTVS )
3297: $ NRVT = M
3298: CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3299: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3300: END IF
3301: IF( WNTUO ) THEN
3302: *
3303: * If left singular vectors desired in A, generate left
3304: * bidiagonalizing vectors in A
3305: * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3306: *
3307: CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3308: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3309: END IF
3310: IF( WNTVO ) THEN
3311: *
3312: * If right singular vectors desired in A, generate right
3313: * bidiagonalizing vectors in A
3314: * (Workspace: need 4*M, prefer 3*M+M*NB)
3315: *
3316: CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3317: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3318: END IF
3319: IWORK = IE + M
3320: IF( WNTUAS .OR. WNTUO )
3321: $ NRU = M
3322: IF( WNTUN )
3323: $ NRU = 0
3324: IF( WNTVAS .OR. WNTVO )
3325: $ NCVT = N
3326: IF( WNTVN )
3327: $ NCVT = 0
3328: IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3329: *
3330: * Perform bidiagonal QR iteration, if desired, computing
3331: * left singular vectors in U and computing right singular
3332: * vectors in VT
3333: * (Workspace: need BDSPAC)
3334: *
3335: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3336: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3337: ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3338: *
3339: * Perform bidiagonal QR iteration, if desired, computing
3340: * left singular vectors in U and computing right singular
3341: * vectors in A
3342: * (Workspace: need BDSPAC)
3343: *
3344: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3345: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
3346: ELSE
3347: *
3348: * Perform bidiagonal QR iteration, if desired, computing
3349: * left singular vectors in A and computing right singular
3350: * vectors in VT
3351: * (Workspace: need BDSPAC)
3352: *
3353: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3354: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3355: END IF
3356: *
3357: END IF
3358: *
3359: END IF
3360: *
3361: * If DBDSQR failed to converge, copy unconverged superdiagonals
3362: * to WORK( 2:MINMN )
3363: *
3364: IF( INFO.NE.0 ) THEN
3365: IF( IE.GT.2 ) THEN
3366: DO 50 I = 1, MINMN - 1
3367: WORK( I+1 ) = WORK( I+IE-1 )
3368: 50 CONTINUE
3369: END IF
3370: IF( IE.LT.2 ) THEN
3371: DO 60 I = MINMN - 1, 1, -1
3372: WORK( I+1 ) = WORK( I+IE-1 )
3373: 60 CONTINUE
3374: END IF
3375: END IF
3376: *
3377: * Undo scaling if necessary
3378: *
3379: IF( ISCL.EQ.1 ) THEN
3380: IF( ANRM.GT.BIGNUM )
3381: $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3382: $ IERR )
3383: IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3384: $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3385: $ MINMN, IERR )
3386: IF( ANRM.LT.SMLNUM )
3387: $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3388: $ IERR )
3389: IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3390: $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3391: $ MINMN, IERR )
3392: END IF
3393: *
3394: * Return optimal workspace in WORK(1)
3395: *
3396: WORK( 1 ) = MAXWRK
3397: *
3398: RETURN
3399: *
3400: * End of DGESVD
3401: *
3402: END
CVSweb interface <joel.bertrand@systella.fr>