Annotation of rpl/lapack/lapack/dgesvd.f, revision 1.8
1.1 bertrand 1: SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
2: $ WORK, LWORK, INFO )
3: *
1.8 ! bertrand 4: * -- LAPACK driver routine (version 3.3.1) --
1.1 bertrand 5: * -- LAPACK is a software package provided by Univ. of Tennessee, --
6: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
1.8 ! bertrand 7: * -- April 2011 --
1.1 bertrand 8: *
9: * .. Scalar Arguments ..
10: CHARACTER JOBU, JOBVT
11: INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
12: * ..
13: * .. Array Arguments ..
14: DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
15: $ VT( LDVT, * ), WORK( * )
16: * ..
17: *
18: * Purpose
19: * =======
20: *
21: * DGESVD computes the singular value decomposition (SVD) of a real
22: * M-by-N matrix A, optionally computing the left and/or right singular
23: * vectors. The SVD is written
24: *
25: * A = U * SIGMA * transpose(V)
26: *
27: * where SIGMA is an M-by-N matrix which is zero except for its
28: * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
29: * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
30: * are the singular values of A; they are real and non-negative, and
31: * are returned in descending order. The first min(m,n) columns of
32: * U and V are the left and right singular vectors of A.
33: *
34: * Note that the routine returns V**T, not V.
35: *
36: * Arguments
37: * =========
38: *
39: * JOBU (input) CHARACTER*1
40: * Specifies options for computing all or part of the matrix U:
41: * = 'A': all M columns of U are returned in array U:
42: * = 'S': the first min(m,n) columns of U (the left singular
43: * vectors) are returned in the array U;
44: * = 'O': the first min(m,n) columns of U (the left singular
45: * vectors) are overwritten on the array A;
46: * = 'N': no columns of U (no left singular vectors) are
47: * computed.
48: *
49: * JOBVT (input) CHARACTER*1
50: * Specifies options for computing all or part of the matrix
51: * V**T:
52: * = 'A': all N rows of V**T are returned in the array VT;
53: * = 'S': the first min(m,n) rows of V**T (the right singular
54: * vectors) are returned in the array VT;
55: * = 'O': the first min(m,n) rows of V**T (the right singular
56: * vectors) are overwritten on the array A;
57: * = 'N': no rows of V**T (no right singular vectors) are
58: * computed.
59: *
60: * JOBVT and JOBU cannot both be 'O'.
61: *
62: * M (input) INTEGER
63: * The number of rows of the input matrix A. M >= 0.
64: *
65: * N (input) INTEGER
66: * The number of columns of the input matrix A. N >= 0.
67: *
68: * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
69: * On entry, the M-by-N matrix A.
70: * On exit,
71: * if JOBU = 'O', A is overwritten with the first min(m,n)
72: * columns of U (the left singular vectors,
73: * stored columnwise);
74: * if JOBVT = 'O', A is overwritten with the first min(m,n)
75: * rows of V**T (the right singular vectors,
76: * stored rowwise);
77: * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
78: * are destroyed.
79: *
80: * LDA (input) INTEGER
81: * The leading dimension of the array A. LDA >= max(1,M).
82: *
83: * S (output) DOUBLE PRECISION array, dimension (min(M,N))
84: * The singular values of A, sorted so that S(i) >= S(i+1).
85: *
86: * U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
87: * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
88: * If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
89: * if JOBU = 'S', U contains the first min(m,n) columns of U
90: * (the left singular vectors, stored columnwise);
91: * if JOBU = 'N' or 'O', U is not referenced.
92: *
93: * LDU (input) INTEGER
94: * The leading dimension of the array U. LDU >= 1; if
95: * JOBU = 'S' or 'A', LDU >= M.
96: *
97: * VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
98: * If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
99: * V**T;
100: * if JOBVT = 'S', VT contains the first min(m,n) rows of
101: * V**T (the right singular vectors, stored rowwise);
102: * if JOBVT = 'N' or 'O', VT is not referenced.
103: *
104: * LDVT (input) INTEGER
105: * The leading dimension of the array VT. LDVT >= 1; if
106: * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
107: *
108: * WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
109: * On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
110: * if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
111: * superdiagonal elements of an upper bidiagonal matrix B
112: * whose diagonal is in S (not necessarily sorted). B
113: * satisfies A = U * B * VT, so it has the same singular values
114: * as A, and singular vectors related by U and VT.
115: *
116: * LWORK (input) INTEGER
117: * The dimension of the array WORK.
1.8 ! bertrand 118: * LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
! 119: * - PATH 1 (M much larger than N, JOBU='N')
! 120: * - PATH 1t (N much larger than M, JOBVT='N')
! 121: * LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
1.1 bertrand 122: * For good performance, LWORK should generally be larger.
123: *
124: * If LWORK = -1, then a workspace query is assumed; the routine
125: * only calculates the optimal size of the WORK array, returns
126: * this value as the first entry of the WORK array, and no error
127: * message related to LWORK is issued by XERBLA.
128: *
129: * INFO (output) INTEGER
130: * = 0: successful exit.
131: * < 0: if INFO = -i, the i-th argument had an illegal value.
132: * > 0: if DBDSQR did not converge, INFO specifies how many
133: * superdiagonals of an intermediate bidiagonal form B
134: * did not converge to zero. See the description of WORK
135: * above for details.
136: *
137: * =====================================================================
138: *
139: * .. Parameters ..
140: DOUBLE PRECISION ZERO, ONE
141: PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
142: * ..
143: * .. Local Scalars ..
144: LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
145: $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
146: INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
147: $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
148: $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
149: $ NRVT, WRKBL
150: DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
151: * ..
152: * .. Local Arrays ..
153: DOUBLE PRECISION DUM( 1 )
154: * ..
155: * .. External Subroutines ..
156: EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
157: $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
158: $ XERBLA
159: * ..
160: * .. External Functions ..
161: LOGICAL LSAME
162: INTEGER ILAENV
163: DOUBLE PRECISION DLAMCH, DLANGE
164: EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE
165: * ..
166: * .. Intrinsic Functions ..
167: INTRINSIC MAX, MIN, SQRT
168: * ..
169: * .. Executable Statements ..
170: *
171: * Test the input arguments
172: *
173: INFO = 0
174: MINMN = MIN( M, N )
175: WNTUA = LSAME( JOBU, 'A' )
176: WNTUS = LSAME( JOBU, 'S' )
177: WNTUAS = WNTUA .OR. WNTUS
178: WNTUO = LSAME( JOBU, 'O' )
179: WNTUN = LSAME( JOBU, 'N' )
180: WNTVA = LSAME( JOBVT, 'A' )
181: WNTVS = LSAME( JOBVT, 'S' )
182: WNTVAS = WNTVA .OR. WNTVS
183: WNTVO = LSAME( JOBVT, 'O' )
184: WNTVN = LSAME( JOBVT, 'N' )
185: LQUERY = ( LWORK.EQ.-1 )
186: *
187: IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
188: INFO = -1
189: ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
190: $ ( WNTVO .AND. WNTUO ) ) THEN
191: INFO = -2
192: ELSE IF( M.LT.0 ) THEN
193: INFO = -3
194: ELSE IF( N.LT.0 ) THEN
195: INFO = -4
196: ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
197: INFO = -6
198: ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
199: INFO = -9
200: ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
201: $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
202: INFO = -11
203: END IF
204: *
205: * Compute workspace
206: * (Note: Comments in the code beginning "Workspace:" describe the
207: * minimal amount of workspace needed at that point in the code,
208: * as well as the preferred amount for good performance.
209: * NB refers to the optimal block size for the immediately
210: * following subroutine, as returned by ILAENV.)
211: *
212: IF( INFO.EQ.0 ) THEN
213: MINWRK = 1
214: MAXWRK = 1
215: IF( M.GE.N .AND. MINMN.GT.0 ) THEN
216: *
217: * Compute space needed for DBDSQR
218: *
219: MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
220: BDSPAC = 5*N
221: IF( M.GE.MNTHR ) THEN
222: IF( WNTUN ) THEN
223: *
224: * Path 1 (M much larger than N, JOBU='N')
225: *
226: MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
227: $ -1 )
228: MAXWRK = MAX( MAXWRK, 3*N+2*N*
229: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
230: IF( WNTVO .OR. WNTVAS )
231: $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
232: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
233: MAXWRK = MAX( MAXWRK, BDSPAC )
234: MINWRK = MAX( 4*N, BDSPAC )
235: ELSE IF( WNTUO .AND. WNTVN ) THEN
236: *
237: * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
238: *
239: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
240: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
241: $ N, N, -1 ) )
242: WRKBL = MAX( WRKBL, 3*N+2*N*
243: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
244: WRKBL = MAX( WRKBL, 3*N+N*
245: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
246: WRKBL = MAX( WRKBL, BDSPAC )
247: MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
248: MINWRK = MAX( 3*N+M, BDSPAC )
249: ELSE IF( WNTUO .AND. WNTVAS ) THEN
250: *
251: * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
252: * 'A')
253: *
254: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
255: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
256: $ N, N, -1 ) )
257: WRKBL = MAX( WRKBL, 3*N+2*N*
258: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
259: WRKBL = MAX( WRKBL, 3*N+N*
260: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
261: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
262: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
263: WRKBL = MAX( WRKBL, BDSPAC )
264: MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
265: MINWRK = MAX( 3*N+M, BDSPAC )
266: ELSE IF( WNTUS .AND. WNTVN ) THEN
267: *
268: * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
269: *
270: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
271: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
272: $ N, N, -1 ) )
273: WRKBL = MAX( WRKBL, 3*N+2*N*
274: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
275: WRKBL = MAX( WRKBL, 3*N+N*
276: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
277: WRKBL = MAX( WRKBL, BDSPAC )
278: MAXWRK = N*N + WRKBL
279: MINWRK = MAX( 3*N+M, BDSPAC )
280: ELSE IF( WNTUS .AND. WNTVO ) THEN
281: *
282: * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
283: *
284: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
285: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
286: $ N, N, -1 ) )
287: WRKBL = MAX( WRKBL, 3*N+2*N*
288: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
289: WRKBL = MAX( WRKBL, 3*N+N*
290: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
291: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
292: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
293: WRKBL = MAX( WRKBL, BDSPAC )
294: MAXWRK = 2*N*N + WRKBL
295: MINWRK = MAX( 3*N+M, BDSPAC )
296: ELSE IF( WNTUS .AND. WNTVAS ) THEN
297: *
298: * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
299: * 'A')
300: *
301: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
302: WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
303: $ N, N, -1 ) )
304: WRKBL = MAX( WRKBL, 3*N+2*N*
305: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
306: WRKBL = MAX( WRKBL, 3*N+N*
307: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
308: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
309: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
310: WRKBL = MAX( WRKBL, BDSPAC )
311: MAXWRK = N*N + WRKBL
312: MINWRK = MAX( 3*N+M, BDSPAC )
313: ELSE IF( WNTUA .AND. WNTVN ) THEN
314: *
315: * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
316: *
317: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
318: WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
319: $ M, N, -1 ) )
320: WRKBL = MAX( WRKBL, 3*N+2*N*
321: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
322: WRKBL = MAX( WRKBL, 3*N+N*
323: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
324: WRKBL = MAX( WRKBL, BDSPAC )
325: MAXWRK = N*N + WRKBL
326: MINWRK = MAX( 3*N+M, BDSPAC )
327: ELSE IF( WNTUA .AND. WNTVO ) THEN
328: *
329: * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
330: *
331: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
332: WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
333: $ M, N, -1 ) )
334: WRKBL = MAX( WRKBL, 3*N+2*N*
335: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
336: WRKBL = MAX( WRKBL, 3*N+N*
337: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
338: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
339: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
340: WRKBL = MAX( WRKBL, BDSPAC )
341: MAXWRK = 2*N*N + WRKBL
342: MINWRK = MAX( 3*N+M, BDSPAC )
343: ELSE IF( WNTUA .AND. WNTVAS ) THEN
344: *
345: * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
346: * 'A')
347: *
348: WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
349: WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
350: $ M, N, -1 ) )
351: WRKBL = MAX( WRKBL, 3*N+2*N*
352: $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
353: WRKBL = MAX( WRKBL, 3*N+N*
354: $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
355: WRKBL = MAX( WRKBL, 3*N+( N-1 )*
356: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
357: WRKBL = MAX( WRKBL, BDSPAC )
358: MAXWRK = N*N + WRKBL
359: MINWRK = MAX( 3*N+M, BDSPAC )
360: END IF
361: ELSE
362: *
363: * Path 10 (M at least N, but not much larger)
364: *
365: MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
366: $ -1, -1 )
367: IF( WNTUS .OR. WNTUO )
368: $ MAXWRK = MAX( MAXWRK, 3*N+N*
369: $ ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) )
370: IF( WNTUA )
371: $ MAXWRK = MAX( MAXWRK, 3*N+M*
372: $ ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) )
373: IF( .NOT.WNTVN )
374: $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
375: $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
376: MAXWRK = MAX( MAXWRK, BDSPAC )
377: MINWRK = MAX( 3*N+M, BDSPAC )
378: END IF
379: ELSE IF( MINMN.GT.0 ) THEN
380: *
381: * Compute space needed for DBDSQR
382: *
383: MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
384: BDSPAC = 5*M
385: IF( N.GE.MNTHR ) THEN
386: IF( WNTVN ) THEN
387: *
388: * Path 1t(N much larger than M, JOBVT='N')
389: *
390: MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
391: $ -1 )
392: MAXWRK = MAX( MAXWRK, 3*M+2*M*
393: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
394: IF( WNTUO .OR. WNTUAS )
395: $ MAXWRK = MAX( MAXWRK, 3*M+M*
396: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
397: MAXWRK = MAX( MAXWRK, BDSPAC )
398: MINWRK = MAX( 4*M, BDSPAC )
399: ELSE IF( WNTVO .AND. WNTUN ) THEN
400: *
401: * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
402: *
403: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
404: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
405: $ N, M, -1 ) )
406: WRKBL = MAX( WRKBL, 3*M+2*M*
407: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
408: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
409: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
410: WRKBL = MAX( WRKBL, BDSPAC )
411: MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
412: MINWRK = MAX( 3*M+N, BDSPAC )
413: ELSE IF( WNTVO .AND. WNTUAS ) THEN
414: *
415: * Path 3t(N much larger than M, JOBU='S' or 'A',
416: * JOBVT='O')
417: *
418: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
419: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
420: $ N, M, -1 ) )
421: WRKBL = MAX( WRKBL, 3*M+2*M*
422: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
423: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
424: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
425: WRKBL = MAX( WRKBL, 3*M+M*
426: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
427: WRKBL = MAX( WRKBL, BDSPAC )
428: MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
429: MINWRK = MAX( 3*M+N, BDSPAC )
430: ELSE IF( WNTVS .AND. WNTUN ) THEN
431: *
432: * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
433: *
434: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
435: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
436: $ N, M, -1 ) )
437: WRKBL = MAX( WRKBL, 3*M+2*M*
438: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
439: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
440: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
441: WRKBL = MAX( WRKBL, BDSPAC )
442: MAXWRK = M*M + WRKBL
443: MINWRK = MAX( 3*M+N, BDSPAC )
444: ELSE IF( WNTVS .AND. WNTUO ) THEN
445: *
446: * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
447: *
448: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
449: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
450: $ N, M, -1 ) )
451: WRKBL = MAX( WRKBL, 3*M+2*M*
452: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
453: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
454: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
455: WRKBL = MAX( WRKBL, 3*M+M*
456: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
457: WRKBL = MAX( WRKBL, BDSPAC )
458: MAXWRK = 2*M*M + WRKBL
459: MINWRK = MAX( 3*M+N, BDSPAC )
460: ELSE IF( WNTVS .AND. WNTUAS ) THEN
461: *
462: * Path 6t(N much larger than M, JOBU='S' or 'A',
463: * JOBVT='S')
464: *
465: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
466: WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
467: $ N, M, -1 ) )
468: WRKBL = MAX( WRKBL, 3*M+2*M*
469: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
470: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
471: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
472: WRKBL = MAX( WRKBL, 3*M+M*
473: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
474: WRKBL = MAX( WRKBL, BDSPAC )
475: MAXWRK = M*M + WRKBL
476: MINWRK = MAX( 3*M+N, BDSPAC )
477: ELSE IF( WNTVA .AND. WNTUN ) THEN
478: *
479: * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
480: *
481: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
482: WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
483: $ N, M, -1 ) )
484: WRKBL = MAX( WRKBL, 3*M+2*M*
485: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
486: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
487: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
488: WRKBL = MAX( WRKBL, BDSPAC )
489: MAXWRK = M*M + WRKBL
490: MINWRK = MAX( 3*M+N, BDSPAC )
491: ELSE IF( WNTVA .AND. WNTUO ) THEN
492: *
493: * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
494: *
495: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
496: WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
497: $ N, M, -1 ) )
498: WRKBL = MAX( WRKBL, 3*M+2*M*
499: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
500: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
501: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
502: WRKBL = MAX( WRKBL, 3*M+M*
503: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
504: WRKBL = MAX( WRKBL, BDSPAC )
505: MAXWRK = 2*M*M + WRKBL
506: MINWRK = MAX( 3*M+N, BDSPAC )
507: ELSE IF( WNTVA .AND. WNTUAS ) THEN
508: *
509: * Path 9t(N much larger than M, JOBU='S' or 'A',
510: * JOBVT='A')
511: *
512: WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
513: WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
514: $ N, M, -1 ) )
515: WRKBL = MAX( WRKBL, 3*M+2*M*
516: $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
517: WRKBL = MAX( WRKBL, 3*M+( M-1 )*
518: $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
519: WRKBL = MAX( WRKBL, 3*M+M*
520: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
521: WRKBL = MAX( WRKBL, BDSPAC )
522: MAXWRK = M*M + WRKBL
523: MINWRK = MAX( 3*M+N, BDSPAC )
524: END IF
525: ELSE
526: *
527: * Path 10t(N greater than M, but not much larger)
528: *
529: MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
530: $ -1, -1 )
531: IF( WNTVS .OR. WNTVO )
532: $ MAXWRK = MAX( MAXWRK, 3*M+M*
533: $ ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
534: IF( WNTVA )
535: $ MAXWRK = MAX( MAXWRK, 3*M+N*
536: $ ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) )
537: IF( .NOT.WNTUN )
538: $ MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
539: $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
540: MAXWRK = MAX( MAXWRK, BDSPAC )
541: MINWRK = MAX( 3*M+N, BDSPAC )
542: END IF
543: END IF
544: MAXWRK = MAX( MAXWRK, MINWRK )
545: WORK( 1 ) = MAXWRK
546: *
547: IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
548: INFO = -13
549: END IF
550: END IF
551: *
552: IF( INFO.NE.0 ) THEN
553: CALL XERBLA( 'DGESVD', -INFO )
554: RETURN
555: ELSE IF( LQUERY ) THEN
556: RETURN
557: END IF
558: *
559: * Quick return if possible
560: *
561: IF( M.EQ.0 .OR. N.EQ.0 ) THEN
562: RETURN
563: END IF
564: *
565: * Get machine constants
566: *
567: EPS = DLAMCH( 'P' )
568: SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
569: BIGNUM = ONE / SMLNUM
570: *
571: * Scale A if max element outside range [SMLNUM,BIGNUM]
572: *
573: ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
574: ISCL = 0
575: IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
576: ISCL = 1
577: CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
578: ELSE IF( ANRM.GT.BIGNUM ) THEN
579: ISCL = 1
580: CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
581: END IF
582: *
583: IF( M.GE.N ) THEN
584: *
585: * A has at least as many rows as columns. If A has sufficiently
586: * more rows than columns, first reduce using the QR
587: * decomposition (if sufficient workspace available)
588: *
589: IF( M.GE.MNTHR ) THEN
590: *
591: IF( WNTUN ) THEN
592: *
593: * Path 1 (M much larger than N, JOBU='N')
594: * No left singular vectors to be computed
595: *
596: ITAU = 1
597: IWORK = ITAU + N
598: *
599: * Compute A=Q*R
600: * (Workspace: need 2*N, prefer N+N*NB)
601: *
602: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
603: $ LWORK-IWORK+1, IERR )
604: *
605: * Zero out below R
606: *
607: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
608: IE = 1
609: ITAUQ = IE + N
610: ITAUP = ITAUQ + N
611: IWORK = ITAUP + N
612: *
613: * Bidiagonalize R in A
614: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
615: *
616: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
617: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
618: $ IERR )
619: NCVT = 0
620: IF( WNTVO .OR. WNTVAS ) THEN
621: *
622: * If right singular vectors desired, generate P'.
623: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
624: *
625: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
626: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
627: NCVT = N
628: END IF
629: IWORK = IE + N
630: *
631: * Perform bidiagonal QR iteration, computing right
632: * singular vectors of A in A if desired
633: * (Workspace: need BDSPAC)
634: *
635: CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
636: $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
637: *
638: * If right singular vectors desired in VT, copy them there
639: *
640: IF( WNTVAS )
641: $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
642: *
643: ELSE IF( WNTUO .AND. WNTVN ) THEN
644: *
645: * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
646: * N left singular vectors to be overwritten on A and
647: * no right singular vectors to be computed
648: *
649: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
650: *
651: * Sufficient workspace for a fast algorithm
652: *
653: IR = 1
654: IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
655: *
656: * WORK(IU) is LDA by N, WORK(IR) is LDA by N
657: *
658: LDWRKU = LDA
659: LDWRKR = LDA
660: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
661: *
662: * WORK(IU) is LDA by N, WORK(IR) is N by N
663: *
664: LDWRKU = LDA
665: LDWRKR = N
666: ELSE
667: *
668: * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
669: *
670: LDWRKU = ( LWORK-N*N-N ) / N
671: LDWRKR = N
672: END IF
673: ITAU = IR + LDWRKR*N
674: IWORK = ITAU + N
675: *
676: * Compute A=Q*R
677: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
678: *
679: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
680: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
681: *
682: * Copy R to WORK(IR) and zero out below it
683: *
684: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
685: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
686: $ LDWRKR )
687: *
688: * Generate Q in A
689: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
690: *
691: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
692: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
693: IE = ITAU
694: ITAUQ = IE + N
695: ITAUP = ITAUQ + N
696: IWORK = ITAUP + N
697: *
698: * Bidiagonalize R in WORK(IR)
699: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
700: *
701: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
702: $ WORK( ITAUQ ), WORK( ITAUP ),
703: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
704: *
705: * Generate left vectors bidiagonalizing R
706: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
707: *
708: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
709: $ WORK( ITAUQ ), WORK( IWORK ),
710: $ LWORK-IWORK+1, IERR )
711: IWORK = IE + N
712: *
713: * Perform bidiagonal QR iteration, computing left
714: * singular vectors of R in WORK(IR)
715: * (Workspace: need N*N+BDSPAC)
716: *
717: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
718: $ WORK( IR ), LDWRKR, DUM, 1,
719: $ WORK( IWORK ), INFO )
720: IU = IE + N
721: *
722: * Multiply Q in A by left singular vectors of R in
723: * WORK(IR), storing result in WORK(IU) and copying to A
724: * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
725: *
726: DO 10 I = 1, M, LDWRKU
727: CHUNK = MIN( M-I+1, LDWRKU )
728: CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
729: $ LDA, WORK( IR ), LDWRKR, ZERO,
730: $ WORK( IU ), LDWRKU )
731: CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
732: $ A( I, 1 ), LDA )
733: 10 CONTINUE
734: *
735: ELSE
736: *
737: * Insufficient workspace for a fast algorithm
738: *
739: IE = 1
740: ITAUQ = IE + N
741: ITAUP = ITAUQ + N
742: IWORK = ITAUP + N
743: *
744: * Bidiagonalize A
745: * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
746: *
747: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
748: $ WORK( ITAUQ ), WORK( ITAUP ),
749: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
750: *
751: * Generate left vectors bidiagonalizing A
752: * (Workspace: need 4*N, prefer 3*N+N*NB)
753: *
754: CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
755: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
756: IWORK = IE + N
757: *
758: * Perform bidiagonal QR iteration, computing left
759: * singular vectors of A in A
760: * (Workspace: need BDSPAC)
761: *
762: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
763: $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
764: *
765: END IF
766: *
767: ELSE IF( WNTUO .AND. WNTVAS ) THEN
768: *
769: * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
770: * N left singular vectors to be overwritten on A and
771: * N right singular vectors to be computed in VT
772: *
773: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
774: *
775: * Sufficient workspace for a fast algorithm
776: *
777: IR = 1
778: IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
779: *
780: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
781: *
782: LDWRKU = LDA
783: LDWRKR = LDA
784: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
785: *
786: * WORK(IU) is LDA by N and WORK(IR) is N by N
787: *
788: LDWRKU = LDA
789: LDWRKR = N
790: ELSE
791: *
792: * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
793: *
794: LDWRKU = ( LWORK-N*N-N ) / N
795: LDWRKR = N
796: END IF
797: ITAU = IR + LDWRKR*N
798: IWORK = ITAU + N
799: *
800: * Compute A=Q*R
801: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
802: *
803: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
804: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
805: *
806: * Copy R to VT, zeroing out below it
807: *
808: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
809: IF( N.GT.1 )
810: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
811: $ VT( 2, 1 ), LDVT )
812: *
813: * Generate Q in A
814: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
815: *
816: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
817: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
818: IE = ITAU
819: ITAUQ = IE + N
820: ITAUP = ITAUQ + N
821: IWORK = ITAUP + N
822: *
823: * Bidiagonalize R in VT, copying result to WORK(IR)
824: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
825: *
826: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
827: $ WORK( ITAUQ ), WORK( ITAUP ),
828: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
829: CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
830: *
831: * Generate left vectors bidiagonalizing R in WORK(IR)
832: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
833: *
834: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
835: $ WORK( ITAUQ ), WORK( IWORK ),
836: $ LWORK-IWORK+1, IERR )
837: *
838: * Generate right vectors bidiagonalizing R in VT
839: * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
840: *
841: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
842: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
843: IWORK = IE + N
844: *
845: * Perform bidiagonal QR iteration, computing left
846: * singular vectors of R in WORK(IR) and computing right
847: * singular vectors of R in VT
848: * (Workspace: need N*N+BDSPAC)
849: *
850: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
851: $ WORK( IR ), LDWRKR, DUM, 1,
852: $ WORK( IWORK ), INFO )
853: IU = IE + N
854: *
855: * Multiply Q in A by left singular vectors of R in
856: * WORK(IR), storing result in WORK(IU) and copying to A
857: * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
858: *
859: DO 20 I = 1, M, LDWRKU
860: CHUNK = MIN( M-I+1, LDWRKU )
861: CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
862: $ LDA, WORK( IR ), LDWRKR, ZERO,
863: $ WORK( IU ), LDWRKU )
864: CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
865: $ A( I, 1 ), LDA )
866: 20 CONTINUE
867: *
868: ELSE
869: *
870: * Insufficient workspace for a fast algorithm
871: *
872: ITAU = 1
873: IWORK = ITAU + N
874: *
875: * Compute A=Q*R
876: * (Workspace: need 2*N, prefer N+N*NB)
877: *
878: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
879: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
880: *
881: * Copy R to VT, zeroing out below it
882: *
883: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
884: IF( N.GT.1 )
885: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
886: $ VT( 2, 1 ), LDVT )
887: *
888: * Generate Q in A
889: * (Workspace: need 2*N, prefer N+N*NB)
890: *
891: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
892: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
893: IE = ITAU
894: ITAUQ = IE + N
895: ITAUP = ITAUQ + N
896: IWORK = ITAUP + N
897: *
898: * Bidiagonalize R in VT
899: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
900: *
901: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
902: $ WORK( ITAUQ ), WORK( ITAUP ),
903: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
904: *
905: * Multiply Q in A by left vectors bidiagonalizing R
906: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
907: *
908: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
909: $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
910: $ LWORK-IWORK+1, IERR )
911: *
912: * Generate right vectors bidiagonalizing R in VT
913: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
914: *
915: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
916: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
917: IWORK = IE + N
918: *
919: * Perform bidiagonal QR iteration, computing left
920: * singular vectors of A in A and computing right
921: * singular vectors of A in VT
922: * (Workspace: need BDSPAC)
923: *
924: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
925: $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
926: *
927: END IF
928: *
929: ELSE IF( WNTUS ) THEN
930: *
931: IF( WNTVN ) THEN
932: *
933: * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
934: * N left singular vectors to be computed in U and
935: * no right singular vectors to be computed
936: *
937: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
938: *
939: * Sufficient workspace for a fast algorithm
940: *
941: IR = 1
942: IF( LWORK.GE.WRKBL+LDA*N ) THEN
943: *
944: * WORK(IR) is LDA by N
945: *
946: LDWRKR = LDA
947: ELSE
948: *
949: * WORK(IR) is N by N
950: *
951: LDWRKR = N
952: END IF
953: ITAU = IR + LDWRKR*N
954: IWORK = ITAU + N
955: *
956: * Compute A=Q*R
957: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
958: *
959: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
960: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
961: *
962: * Copy R to WORK(IR), zeroing out below it
963: *
964: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
965: $ LDWRKR )
966: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
967: $ WORK( IR+1 ), LDWRKR )
968: *
969: * Generate Q in A
970: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
971: *
972: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
973: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
974: IE = ITAU
975: ITAUQ = IE + N
976: ITAUP = ITAUQ + N
977: IWORK = ITAUP + N
978: *
979: * Bidiagonalize R in WORK(IR)
980: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
981: *
982: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
983: $ WORK( IE ), WORK( ITAUQ ),
984: $ WORK( ITAUP ), WORK( IWORK ),
985: $ LWORK-IWORK+1, IERR )
986: *
987: * Generate left vectors bidiagonalizing R in WORK(IR)
988: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
989: *
990: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
991: $ WORK( ITAUQ ), WORK( IWORK ),
992: $ LWORK-IWORK+1, IERR )
993: IWORK = IE + N
994: *
995: * Perform bidiagonal QR iteration, computing left
996: * singular vectors of R in WORK(IR)
997: * (Workspace: need N*N+BDSPAC)
998: *
999: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1000: $ 1, WORK( IR ), LDWRKR, DUM, 1,
1001: $ WORK( IWORK ), INFO )
1002: *
1003: * Multiply Q in A by left singular vectors of R in
1004: * WORK(IR), storing result in U
1005: * (Workspace: need N*N)
1006: *
1007: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1008: $ WORK( IR ), LDWRKR, ZERO, U, LDU )
1009: *
1010: ELSE
1011: *
1012: * Insufficient workspace for a fast algorithm
1013: *
1014: ITAU = 1
1015: IWORK = ITAU + N
1016: *
1017: * Compute A=Q*R, copying result to U
1018: * (Workspace: need 2*N, prefer N+N*NB)
1019: *
1020: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1021: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1022: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1023: *
1024: * Generate Q in U
1025: * (Workspace: need 2*N, prefer N+N*NB)
1026: *
1027: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1028: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1029: IE = ITAU
1030: ITAUQ = IE + N
1031: ITAUP = ITAUQ + N
1032: IWORK = ITAUP + N
1033: *
1034: * Zero out below R in A
1035: *
1036: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1037: $ LDA )
1038: *
1039: * Bidiagonalize R in A
1040: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1041: *
1042: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1043: $ WORK( ITAUQ ), WORK( ITAUP ),
1044: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1045: *
1046: * Multiply Q in U by left vectors bidiagonalizing R
1047: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1048: *
1049: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1050: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1051: $ LWORK-IWORK+1, IERR )
1052: IWORK = IE + N
1053: *
1054: * Perform bidiagonal QR iteration, computing left
1055: * singular vectors of A in U
1056: * (Workspace: need BDSPAC)
1057: *
1058: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1059: $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1060: $ INFO )
1061: *
1062: END IF
1063: *
1064: ELSE IF( WNTVO ) THEN
1065: *
1066: * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1067: * N left singular vectors to be computed in U and
1068: * N right singular vectors to be overwritten on A
1069: *
1070: IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1071: *
1072: * Sufficient workspace for a fast algorithm
1073: *
1074: IU = 1
1075: IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1076: *
1077: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1078: *
1079: LDWRKU = LDA
1080: IR = IU + LDWRKU*N
1081: LDWRKR = LDA
1082: ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1083: *
1084: * WORK(IU) is LDA by N and WORK(IR) is N by N
1085: *
1086: LDWRKU = LDA
1087: IR = IU + LDWRKU*N
1088: LDWRKR = N
1089: ELSE
1090: *
1091: * WORK(IU) is N by N and WORK(IR) is N by N
1092: *
1093: LDWRKU = N
1094: IR = IU + LDWRKU*N
1095: LDWRKR = N
1096: END IF
1097: ITAU = IR + LDWRKR*N
1098: IWORK = ITAU + N
1099: *
1100: * Compute A=Q*R
1101: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1102: *
1103: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1104: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1105: *
1106: * Copy R to WORK(IU), zeroing out below it
1107: *
1108: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1109: $ LDWRKU )
1110: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1111: $ WORK( IU+1 ), LDWRKU )
1112: *
1113: * Generate Q in A
1114: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1115: *
1116: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1117: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1118: IE = ITAU
1119: ITAUQ = IE + N
1120: ITAUP = ITAUQ + N
1121: IWORK = ITAUP + N
1122: *
1123: * Bidiagonalize R in WORK(IU), copying result to
1124: * WORK(IR)
1125: * (Workspace: need 2*N*N+4*N,
1126: * prefer 2*N*N+3*N+2*N*NB)
1127: *
1128: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1129: $ WORK( IE ), WORK( ITAUQ ),
1130: $ WORK( ITAUP ), WORK( IWORK ),
1131: $ LWORK-IWORK+1, IERR )
1132: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1133: $ WORK( IR ), LDWRKR )
1134: *
1135: * Generate left bidiagonalizing vectors in WORK(IU)
1136: * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1137: *
1138: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1139: $ WORK( ITAUQ ), WORK( IWORK ),
1140: $ LWORK-IWORK+1, IERR )
1141: *
1142: * Generate right bidiagonalizing vectors in WORK(IR)
1143: * (Workspace: need 2*N*N+4*N-1,
1144: * prefer 2*N*N+3*N+(N-1)*NB)
1145: *
1146: CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1147: $ WORK( ITAUP ), WORK( IWORK ),
1148: $ LWORK-IWORK+1, IERR )
1149: IWORK = IE + N
1150: *
1151: * Perform bidiagonal QR iteration, computing left
1152: * singular vectors of R in WORK(IU) and computing
1153: * right singular vectors of R in WORK(IR)
1154: * (Workspace: need 2*N*N+BDSPAC)
1155: *
1156: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1157: $ WORK( IR ), LDWRKR, WORK( IU ),
1158: $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1159: *
1160: * Multiply Q in A by left singular vectors of R in
1161: * WORK(IU), storing result in U
1162: * (Workspace: need N*N)
1163: *
1164: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1165: $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1166: *
1167: * Copy right singular vectors of R to A
1168: * (Workspace: need N*N)
1169: *
1170: CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1171: $ LDA )
1172: *
1173: ELSE
1174: *
1175: * Insufficient workspace for a fast algorithm
1176: *
1177: ITAU = 1
1178: IWORK = ITAU + N
1179: *
1180: * Compute A=Q*R, copying result to U
1181: * (Workspace: need 2*N, prefer N+N*NB)
1182: *
1183: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1184: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1185: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1186: *
1187: * Generate Q in U
1188: * (Workspace: need 2*N, prefer N+N*NB)
1189: *
1190: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1191: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1192: IE = ITAU
1193: ITAUQ = IE + N
1194: ITAUP = ITAUQ + N
1195: IWORK = ITAUP + N
1196: *
1197: * Zero out below R in A
1198: *
1199: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1200: $ LDA )
1201: *
1202: * Bidiagonalize R in A
1203: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1204: *
1205: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1206: $ WORK( ITAUQ ), WORK( ITAUP ),
1207: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1208: *
1209: * Multiply Q in U by left vectors bidiagonalizing R
1210: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1211: *
1212: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1213: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1214: $ LWORK-IWORK+1, IERR )
1215: *
1216: * Generate right vectors bidiagonalizing R in A
1217: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1218: *
1219: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1220: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1221: IWORK = IE + N
1222: *
1223: * Perform bidiagonal QR iteration, computing left
1224: * singular vectors of A in U and computing right
1225: * singular vectors of A in A
1226: * (Workspace: need BDSPAC)
1227: *
1228: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1229: $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1230: $ INFO )
1231: *
1232: END IF
1233: *
1234: ELSE IF( WNTVAS ) THEN
1235: *
1236: * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1237: * or 'A')
1238: * N left singular vectors to be computed in U and
1239: * N right singular vectors to be computed in VT
1240: *
1241: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1242: *
1243: * Sufficient workspace for a fast algorithm
1244: *
1245: IU = 1
1246: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1247: *
1248: * WORK(IU) is LDA by N
1249: *
1250: LDWRKU = LDA
1251: ELSE
1252: *
1253: * WORK(IU) is N by N
1254: *
1255: LDWRKU = N
1256: END IF
1257: ITAU = IU + LDWRKU*N
1258: IWORK = ITAU + N
1259: *
1260: * Compute A=Q*R
1261: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1262: *
1263: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1264: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1265: *
1266: * Copy R to WORK(IU), zeroing out below it
1267: *
1268: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1269: $ LDWRKU )
1270: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1271: $ WORK( IU+1 ), LDWRKU )
1272: *
1273: * Generate Q in A
1274: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1275: *
1276: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1277: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1278: IE = ITAU
1279: ITAUQ = IE + N
1280: ITAUP = ITAUQ + N
1281: IWORK = ITAUP + N
1282: *
1283: * Bidiagonalize R in WORK(IU), copying result to VT
1284: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1285: *
1286: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1287: $ WORK( IE ), WORK( ITAUQ ),
1288: $ WORK( ITAUP ), WORK( IWORK ),
1289: $ LWORK-IWORK+1, IERR )
1290: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1291: $ LDVT )
1292: *
1293: * Generate left bidiagonalizing vectors in WORK(IU)
1294: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1295: *
1296: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1297: $ WORK( ITAUQ ), WORK( IWORK ),
1298: $ LWORK-IWORK+1, IERR )
1299: *
1300: * Generate right bidiagonalizing vectors in VT
1301: * (Workspace: need N*N+4*N-1,
1302: * prefer N*N+3*N+(N-1)*NB)
1303: *
1304: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1305: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1306: IWORK = IE + N
1307: *
1308: * Perform bidiagonal QR iteration, computing left
1309: * singular vectors of R in WORK(IU) and computing
1310: * right singular vectors of R in VT
1311: * (Workspace: need N*N+BDSPAC)
1312: *
1313: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1314: $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1315: $ WORK( IWORK ), INFO )
1316: *
1317: * Multiply Q in A by left singular vectors of R in
1318: * WORK(IU), storing result in U
1319: * (Workspace: need N*N)
1320: *
1321: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1322: $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1323: *
1324: ELSE
1325: *
1326: * Insufficient workspace for a fast algorithm
1327: *
1328: ITAU = 1
1329: IWORK = ITAU + N
1330: *
1331: * Compute A=Q*R, copying result to U
1332: * (Workspace: need 2*N, prefer N+N*NB)
1333: *
1334: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1335: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1336: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1337: *
1338: * Generate Q in U
1339: * (Workspace: need 2*N, prefer N+N*NB)
1340: *
1341: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1342: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1343: *
1344: * Copy R to VT, zeroing out below it
1345: *
1346: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1347: IF( N.GT.1 )
1348: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1349: $ VT( 2, 1 ), LDVT )
1350: IE = ITAU
1351: ITAUQ = IE + N
1352: ITAUP = ITAUQ + N
1353: IWORK = ITAUP + N
1354: *
1355: * Bidiagonalize R in VT
1356: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1357: *
1358: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1359: $ WORK( ITAUQ ), WORK( ITAUP ),
1360: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1361: *
1362: * Multiply Q in U by left bidiagonalizing vectors
1363: * in VT
1364: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1365: *
1366: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1367: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1368: $ LWORK-IWORK+1, IERR )
1369: *
1370: * Generate right bidiagonalizing vectors in VT
1371: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1372: *
1373: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1374: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1375: IWORK = IE + N
1376: *
1377: * Perform bidiagonal QR iteration, computing left
1378: * singular vectors of A in U and computing right
1379: * singular vectors of A in VT
1380: * (Workspace: need BDSPAC)
1381: *
1382: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1383: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1384: $ INFO )
1385: *
1386: END IF
1387: *
1388: END IF
1389: *
1390: ELSE IF( WNTUA ) THEN
1391: *
1392: IF( WNTVN ) THEN
1393: *
1394: * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1395: * M left singular vectors to be computed in U and
1396: * no right singular vectors to be computed
1397: *
1398: IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1399: *
1400: * Sufficient workspace for a fast algorithm
1401: *
1402: IR = 1
1403: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1404: *
1405: * WORK(IR) is LDA by N
1406: *
1407: LDWRKR = LDA
1408: ELSE
1409: *
1410: * WORK(IR) is N by N
1411: *
1412: LDWRKR = N
1413: END IF
1414: ITAU = IR + LDWRKR*N
1415: IWORK = ITAU + N
1416: *
1417: * Compute A=Q*R, copying result to U
1418: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1419: *
1420: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1421: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1422: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1423: *
1424: * Copy R to WORK(IR), zeroing out below it
1425: *
1426: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1427: $ LDWRKR )
1428: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1429: $ WORK( IR+1 ), LDWRKR )
1430: *
1431: * Generate Q in U
1432: * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1433: *
1434: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1435: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1436: IE = ITAU
1437: ITAUQ = IE + N
1438: ITAUP = ITAUQ + N
1439: IWORK = ITAUP + N
1440: *
1441: * Bidiagonalize R in WORK(IR)
1442: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1443: *
1444: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1445: $ WORK( IE ), WORK( ITAUQ ),
1446: $ WORK( ITAUP ), WORK( IWORK ),
1447: $ LWORK-IWORK+1, IERR )
1448: *
1449: * Generate left bidiagonalizing vectors in WORK(IR)
1450: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1451: *
1452: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1453: $ WORK( ITAUQ ), WORK( IWORK ),
1454: $ LWORK-IWORK+1, IERR )
1455: IWORK = IE + N
1456: *
1457: * Perform bidiagonal QR iteration, computing left
1458: * singular vectors of R in WORK(IR)
1459: * (Workspace: need N*N+BDSPAC)
1460: *
1461: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1462: $ 1, WORK( IR ), LDWRKR, DUM, 1,
1463: $ WORK( IWORK ), INFO )
1464: *
1465: * Multiply Q in U by left singular vectors of R in
1466: * WORK(IR), storing result in A
1467: * (Workspace: need N*N)
1468: *
1469: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1470: $ WORK( IR ), LDWRKR, ZERO, A, LDA )
1471: *
1472: * Copy left singular vectors of A from A to U
1473: *
1474: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1475: *
1476: ELSE
1477: *
1478: * Insufficient workspace for a fast algorithm
1479: *
1480: ITAU = 1
1481: IWORK = ITAU + N
1482: *
1483: * Compute A=Q*R, copying result to U
1484: * (Workspace: need 2*N, prefer N+N*NB)
1485: *
1486: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1487: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1488: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1489: *
1490: * Generate Q in U
1491: * (Workspace: need N+M, prefer N+M*NB)
1492: *
1493: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1494: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1495: IE = ITAU
1496: ITAUQ = IE + N
1497: ITAUP = ITAUQ + N
1498: IWORK = ITAUP + N
1499: *
1500: * Zero out below R in A
1501: *
1502: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1503: $ LDA )
1504: *
1505: * Bidiagonalize R in A
1506: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1507: *
1508: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1509: $ WORK( ITAUQ ), WORK( ITAUP ),
1510: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1511: *
1512: * Multiply Q in U by left bidiagonalizing vectors
1513: * in A
1514: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1515: *
1516: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1517: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1518: $ LWORK-IWORK+1, IERR )
1519: IWORK = IE + N
1520: *
1521: * Perform bidiagonal QR iteration, computing left
1522: * singular vectors of A in U
1523: * (Workspace: need BDSPAC)
1524: *
1525: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1526: $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1527: $ INFO )
1528: *
1529: END IF
1530: *
1531: ELSE IF( WNTVO ) THEN
1532: *
1533: * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1534: * M left singular vectors to be computed in U and
1535: * N right singular vectors to be overwritten on A
1536: *
1537: IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1538: *
1539: * Sufficient workspace for a fast algorithm
1540: *
1541: IU = 1
1542: IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1543: *
1544: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1545: *
1546: LDWRKU = LDA
1547: IR = IU + LDWRKU*N
1548: LDWRKR = LDA
1549: ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1550: *
1551: * WORK(IU) is LDA by N and WORK(IR) is N by N
1552: *
1553: LDWRKU = LDA
1554: IR = IU + LDWRKU*N
1555: LDWRKR = N
1556: ELSE
1557: *
1558: * WORK(IU) is N by N and WORK(IR) is N by N
1559: *
1560: LDWRKU = N
1561: IR = IU + LDWRKU*N
1562: LDWRKR = N
1563: END IF
1564: ITAU = IR + LDWRKR*N
1565: IWORK = ITAU + N
1566: *
1567: * Compute A=Q*R, copying result to U
1568: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1569: *
1570: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1571: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1572: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1573: *
1574: * Generate Q in U
1575: * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1576: *
1577: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1578: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1579: *
1580: * Copy R to WORK(IU), zeroing out below it
1581: *
1582: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1583: $ LDWRKU )
1584: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1585: $ WORK( IU+1 ), LDWRKU )
1586: IE = ITAU
1587: ITAUQ = IE + N
1588: ITAUP = ITAUQ + N
1589: IWORK = ITAUP + N
1590: *
1591: * Bidiagonalize R in WORK(IU), copying result to
1592: * WORK(IR)
1593: * (Workspace: need 2*N*N+4*N,
1594: * prefer 2*N*N+3*N+2*N*NB)
1595: *
1596: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1597: $ WORK( IE ), WORK( ITAUQ ),
1598: $ WORK( ITAUP ), WORK( IWORK ),
1599: $ LWORK-IWORK+1, IERR )
1600: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1601: $ WORK( IR ), LDWRKR )
1602: *
1603: * Generate left bidiagonalizing vectors in WORK(IU)
1604: * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1605: *
1606: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1607: $ WORK( ITAUQ ), WORK( IWORK ),
1608: $ LWORK-IWORK+1, IERR )
1609: *
1610: * Generate right bidiagonalizing vectors in WORK(IR)
1611: * (Workspace: need 2*N*N+4*N-1,
1612: * prefer 2*N*N+3*N+(N-1)*NB)
1613: *
1614: CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1615: $ WORK( ITAUP ), WORK( IWORK ),
1616: $ LWORK-IWORK+1, IERR )
1617: IWORK = IE + N
1618: *
1619: * Perform bidiagonal QR iteration, computing left
1620: * singular vectors of R in WORK(IU) and computing
1621: * right singular vectors of R in WORK(IR)
1622: * (Workspace: need 2*N*N+BDSPAC)
1623: *
1624: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1625: $ WORK( IR ), LDWRKR, WORK( IU ),
1626: $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1627: *
1628: * Multiply Q in U by left singular vectors of R in
1629: * WORK(IU), storing result in A
1630: * (Workspace: need N*N)
1631: *
1632: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1633: $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1634: *
1635: * Copy left singular vectors of A from A to U
1636: *
1637: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1638: *
1639: * Copy right singular vectors of R from WORK(IR) to A
1640: *
1641: CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1642: $ LDA )
1643: *
1644: ELSE
1645: *
1646: * Insufficient workspace for a fast algorithm
1647: *
1648: ITAU = 1
1649: IWORK = ITAU + N
1650: *
1651: * Compute A=Q*R, copying result to U
1652: * (Workspace: need 2*N, prefer N+N*NB)
1653: *
1654: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1655: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1656: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1657: *
1658: * Generate Q in U
1659: * (Workspace: need N+M, prefer N+M*NB)
1660: *
1661: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1662: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1663: IE = ITAU
1664: ITAUQ = IE + N
1665: ITAUP = ITAUQ + N
1666: IWORK = ITAUP + N
1667: *
1668: * Zero out below R in A
1669: *
1670: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1671: $ LDA )
1672: *
1673: * Bidiagonalize R in A
1674: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1675: *
1676: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1677: $ WORK( ITAUQ ), WORK( ITAUP ),
1678: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1679: *
1680: * Multiply Q in U by left bidiagonalizing vectors
1681: * in A
1682: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1683: *
1684: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1685: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1686: $ LWORK-IWORK+1, IERR )
1687: *
1688: * Generate right bidiagonalizing vectors in A
1689: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1690: *
1691: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1692: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1693: IWORK = IE + N
1694: *
1695: * Perform bidiagonal QR iteration, computing left
1696: * singular vectors of A in U and computing right
1697: * singular vectors of A in A
1698: * (Workspace: need BDSPAC)
1699: *
1700: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1701: $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1702: $ INFO )
1703: *
1704: END IF
1705: *
1706: ELSE IF( WNTVAS ) THEN
1707: *
1708: * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1709: * or 'A')
1710: * M left singular vectors to be computed in U and
1711: * N right singular vectors to be computed in VT
1712: *
1713: IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1714: *
1715: * Sufficient workspace for a fast algorithm
1716: *
1717: IU = 1
1718: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1719: *
1720: * WORK(IU) is LDA by N
1721: *
1722: LDWRKU = LDA
1723: ELSE
1724: *
1725: * WORK(IU) is N by N
1726: *
1727: LDWRKU = N
1728: END IF
1729: ITAU = IU + LDWRKU*N
1730: IWORK = ITAU + N
1731: *
1732: * Compute A=Q*R, copying result to U
1733: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1734: *
1735: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1736: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1737: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1738: *
1739: * Generate Q in U
1740: * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1741: *
1742: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1743: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1744: *
1745: * Copy R to WORK(IU), zeroing out below it
1746: *
1747: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1748: $ LDWRKU )
1749: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1750: $ WORK( IU+1 ), LDWRKU )
1751: IE = ITAU
1752: ITAUQ = IE + N
1753: ITAUP = ITAUQ + N
1754: IWORK = ITAUP + N
1755: *
1756: * Bidiagonalize R in WORK(IU), copying result to VT
1757: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1758: *
1759: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1760: $ WORK( IE ), WORK( ITAUQ ),
1761: $ WORK( ITAUP ), WORK( IWORK ),
1762: $ LWORK-IWORK+1, IERR )
1763: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1764: $ LDVT )
1765: *
1766: * Generate left bidiagonalizing vectors in WORK(IU)
1767: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1768: *
1769: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1770: $ WORK( ITAUQ ), WORK( IWORK ),
1771: $ LWORK-IWORK+1, IERR )
1772: *
1773: * Generate right bidiagonalizing vectors in VT
1774: * (Workspace: need N*N+4*N-1,
1775: * prefer N*N+3*N+(N-1)*NB)
1776: *
1777: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1778: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1779: IWORK = IE + N
1780: *
1781: * Perform bidiagonal QR iteration, computing left
1782: * singular vectors of R in WORK(IU) and computing
1783: * right singular vectors of R in VT
1784: * (Workspace: need N*N+BDSPAC)
1785: *
1786: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1787: $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1788: $ WORK( IWORK ), INFO )
1789: *
1790: * Multiply Q in U by left singular vectors of R in
1791: * WORK(IU), storing result in A
1792: * (Workspace: need N*N)
1793: *
1794: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1795: $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1796: *
1797: * Copy left singular vectors of A from A to U
1798: *
1799: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1800: *
1801: ELSE
1802: *
1803: * Insufficient workspace for a fast algorithm
1804: *
1805: ITAU = 1
1806: IWORK = ITAU + N
1807: *
1808: * Compute A=Q*R, copying result to U
1809: * (Workspace: need 2*N, prefer N+N*NB)
1810: *
1811: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1812: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1813: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1814: *
1815: * Generate Q in U
1816: * (Workspace: need N+M, prefer N+M*NB)
1817: *
1818: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1819: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1820: *
1821: * Copy R from A to VT, zeroing out below it
1822: *
1823: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1824: IF( N.GT.1 )
1825: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1826: $ VT( 2, 1 ), LDVT )
1827: IE = ITAU
1828: ITAUQ = IE + N
1829: ITAUP = ITAUQ + N
1830: IWORK = ITAUP + N
1831: *
1832: * Bidiagonalize R in VT
1833: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1834: *
1835: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1836: $ WORK( ITAUQ ), WORK( ITAUP ),
1837: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1838: *
1839: * Multiply Q in U by left bidiagonalizing vectors
1840: * in VT
1841: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1842: *
1843: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1844: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1845: $ LWORK-IWORK+1, IERR )
1846: *
1847: * Generate right bidiagonalizing vectors in VT
1848: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1849: *
1850: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1851: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1852: IWORK = IE + N
1853: *
1854: * Perform bidiagonal QR iteration, computing left
1855: * singular vectors of A in U and computing right
1856: * singular vectors of A in VT
1857: * (Workspace: need BDSPAC)
1858: *
1859: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1860: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1861: $ INFO )
1862: *
1863: END IF
1864: *
1865: END IF
1866: *
1867: END IF
1868: *
1869: ELSE
1870: *
1871: * M .LT. MNTHR
1872: *
1873: * Path 10 (M at least N, but not much larger)
1874: * Reduce to bidiagonal form without QR decomposition
1875: *
1876: IE = 1
1877: ITAUQ = IE + N
1878: ITAUP = ITAUQ + N
1879: IWORK = ITAUP + N
1880: *
1881: * Bidiagonalize A
1882: * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1883: *
1884: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1885: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1886: $ IERR )
1887: IF( WNTUAS ) THEN
1888: *
1889: * If left singular vectors desired in U, copy result to U
1890: * and generate left bidiagonalizing vectors in U
1891: * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1892: *
1893: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1894: IF( WNTUS )
1895: $ NCU = N
1896: IF( WNTUA )
1897: $ NCU = M
1898: CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1899: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1900: END IF
1901: IF( WNTVAS ) THEN
1902: *
1903: * If right singular vectors desired in VT, copy result to
1904: * VT and generate right bidiagonalizing vectors in VT
1905: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1906: *
1907: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1908: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1909: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1910: END IF
1911: IF( WNTUO ) THEN
1912: *
1913: * If left singular vectors desired in A, generate left
1914: * bidiagonalizing vectors in A
1915: * (Workspace: need 4*N, prefer 3*N+N*NB)
1916: *
1917: CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1918: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1919: END IF
1920: IF( WNTVO ) THEN
1921: *
1922: * If right singular vectors desired in A, generate right
1923: * bidiagonalizing vectors in A
1924: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1925: *
1926: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1927: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1928: END IF
1929: IWORK = IE + N
1930: IF( WNTUAS .OR. WNTUO )
1931: $ NRU = M
1932: IF( WNTUN )
1933: $ NRU = 0
1934: IF( WNTVAS .OR. WNTVO )
1935: $ NCVT = N
1936: IF( WNTVN )
1937: $ NCVT = 0
1938: IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1939: *
1940: * Perform bidiagonal QR iteration, if desired, computing
1941: * left singular vectors in U and computing right singular
1942: * vectors in VT
1943: * (Workspace: need BDSPAC)
1944: *
1945: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1946: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1947: ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1948: *
1949: * Perform bidiagonal QR iteration, if desired, computing
1950: * left singular vectors in U and computing right singular
1951: * vectors in A
1952: * (Workspace: need BDSPAC)
1953: *
1954: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1955: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
1956: ELSE
1957: *
1958: * Perform bidiagonal QR iteration, if desired, computing
1959: * left singular vectors in A and computing right singular
1960: * vectors in VT
1961: * (Workspace: need BDSPAC)
1962: *
1963: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1964: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1965: END IF
1966: *
1967: END IF
1968: *
1969: ELSE
1970: *
1971: * A has more columns than rows. If A has sufficiently more
1972: * columns than rows, first reduce using the LQ decomposition (if
1973: * sufficient workspace available)
1974: *
1975: IF( N.GE.MNTHR ) THEN
1976: *
1977: IF( WNTVN ) THEN
1978: *
1979: * Path 1t(N much larger than M, JOBVT='N')
1980: * No right singular vectors to be computed
1981: *
1982: ITAU = 1
1983: IWORK = ITAU + M
1984: *
1985: * Compute A=L*Q
1986: * (Workspace: need 2*M, prefer M+M*NB)
1987: *
1988: CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
1989: $ LWORK-IWORK+1, IERR )
1990: *
1991: * Zero out above L
1992: *
1993: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
1994: IE = 1
1995: ITAUQ = IE + M
1996: ITAUP = ITAUQ + M
1997: IWORK = ITAUP + M
1998: *
1999: * Bidiagonalize L in A
2000: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2001: *
2002: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2003: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2004: $ IERR )
2005: IF( WNTUO .OR. WNTUAS ) THEN
2006: *
2007: * If left singular vectors desired, generate Q
2008: * (Workspace: need 4*M, prefer 3*M+M*NB)
2009: *
2010: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2011: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2012: END IF
2013: IWORK = IE + M
2014: NRU = 0
2015: IF( WNTUO .OR. WNTUAS )
2016: $ NRU = M
2017: *
2018: * Perform bidiagonal QR iteration, computing left singular
2019: * vectors of A in A if desired
2020: * (Workspace: need BDSPAC)
2021: *
2022: CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2023: $ LDA, DUM, 1, WORK( IWORK ), INFO )
2024: *
2025: * If left singular vectors desired in U, copy them there
2026: *
2027: IF( WNTUAS )
2028: $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2029: *
2030: ELSE IF( WNTVO .AND. WNTUN ) THEN
2031: *
2032: * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2033: * M right singular vectors to be overwritten on A and
2034: * no left singular vectors to be computed
2035: *
2036: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2037: *
2038: * Sufficient workspace for a fast algorithm
2039: *
2040: IR = 1
2041: IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2042: *
2043: * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2044: *
2045: LDWRKU = LDA
2046: CHUNK = N
2047: LDWRKR = LDA
2048: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2049: *
2050: * WORK(IU) is LDA by N and WORK(IR) is M by M
2051: *
2052: LDWRKU = LDA
2053: CHUNK = N
2054: LDWRKR = M
2055: ELSE
2056: *
2057: * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2058: *
2059: LDWRKU = M
2060: CHUNK = ( LWORK-M*M-M ) / M
2061: LDWRKR = M
2062: END IF
2063: ITAU = IR + LDWRKR*M
2064: IWORK = ITAU + M
2065: *
2066: * Compute A=L*Q
2067: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2068: *
2069: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2070: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2071: *
2072: * Copy L to WORK(IR) and zero out above it
2073: *
2074: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2075: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2076: $ WORK( IR+LDWRKR ), LDWRKR )
2077: *
2078: * Generate Q in A
2079: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2080: *
2081: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2082: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2083: IE = ITAU
2084: ITAUQ = IE + M
2085: ITAUP = ITAUQ + M
2086: IWORK = ITAUP + M
2087: *
2088: * Bidiagonalize L in WORK(IR)
2089: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2090: *
2091: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2092: $ WORK( ITAUQ ), WORK( ITAUP ),
2093: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2094: *
2095: * Generate right vectors bidiagonalizing L
2096: * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2097: *
2098: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2099: $ WORK( ITAUP ), WORK( IWORK ),
2100: $ LWORK-IWORK+1, IERR )
2101: IWORK = IE + M
2102: *
2103: * Perform bidiagonal QR iteration, computing right
2104: * singular vectors of L in WORK(IR)
2105: * (Workspace: need M*M+BDSPAC)
2106: *
2107: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2108: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2109: $ WORK( IWORK ), INFO )
2110: IU = IE + M
2111: *
2112: * Multiply right singular vectors of L in WORK(IR) by Q
2113: * in A, storing result in WORK(IU) and copying to A
2114: * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2115: *
2116: DO 30 I = 1, N, CHUNK
2117: BLK = MIN( N-I+1, CHUNK )
2118: CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2119: $ LDWRKR, A( 1, I ), LDA, ZERO,
2120: $ WORK( IU ), LDWRKU )
2121: CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2122: $ A( 1, I ), LDA )
2123: 30 CONTINUE
2124: *
2125: ELSE
2126: *
2127: * Insufficient workspace for a fast algorithm
2128: *
2129: IE = 1
2130: ITAUQ = IE + M
2131: ITAUP = ITAUQ + M
2132: IWORK = ITAUP + M
2133: *
2134: * Bidiagonalize A
2135: * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2136: *
2137: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2138: $ WORK( ITAUQ ), WORK( ITAUP ),
2139: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2140: *
2141: * Generate right vectors bidiagonalizing A
2142: * (Workspace: need 4*M, prefer 3*M+M*NB)
2143: *
2144: CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2145: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2146: IWORK = IE + M
2147: *
2148: * Perform bidiagonal QR iteration, computing right
2149: * singular vectors of A in A
2150: * (Workspace: need BDSPAC)
2151: *
2152: CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2153: $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2154: *
2155: END IF
2156: *
2157: ELSE IF( WNTVO .AND. WNTUAS ) THEN
2158: *
2159: * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2160: * M right singular vectors to be overwritten on A and
2161: * M left singular vectors to be computed in U
2162: *
2163: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2164: *
2165: * Sufficient workspace for a fast algorithm
2166: *
2167: IR = 1
2168: IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2169: *
2170: * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2171: *
2172: LDWRKU = LDA
2173: CHUNK = N
2174: LDWRKR = LDA
2175: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2176: *
2177: * WORK(IU) is LDA by N and WORK(IR) is M by M
2178: *
2179: LDWRKU = LDA
2180: CHUNK = N
2181: LDWRKR = M
2182: ELSE
2183: *
2184: * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2185: *
2186: LDWRKU = M
2187: CHUNK = ( LWORK-M*M-M ) / M
2188: LDWRKR = M
2189: END IF
2190: ITAU = IR + LDWRKR*M
2191: IWORK = ITAU + M
2192: *
2193: * Compute A=L*Q
2194: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2195: *
2196: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2197: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2198: *
2199: * Copy L to U, zeroing about above it
2200: *
2201: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2202: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2203: $ LDU )
2204: *
2205: * Generate Q in A
2206: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2207: *
2208: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2209: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2210: IE = ITAU
2211: ITAUQ = IE + M
2212: ITAUP = ITAUQ + M
2213: IWORK = ITAUP + M
2214: *
2215: * Bidiagonalize L in U, copying result to WORK(IR)
2216: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2217: *
2218: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2219: $ WORK( ITAUQ ), WORK( ITAUP ),
2220: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2221: CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2222: *
2223: * Generate right vectors bidiagonalizing L in WORK(IR)
2224: * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2225: *
2226: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2227: $ WORK( ITAUP ), WORK( IWORK ),
2228: $ LWORK-IWORK+1, IERR )
2229: *
2230: * Generate left vectors bidiagonalizing L in U
2231: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2232: *
2233: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2234: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2235: IWORK = IE + M
2236: *
2237: * Perform bidiagonal QR iteration, computing left
2238: * singular vectors of L in U, and computing right
2239: * singular vectors of L in WORK(IR)
2240: * (Workspace: need M*M+BDSPAC)
2241: *
2242: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2243: $ WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2244: $ WORK( IWORK ), INFO )
2245: IU = IE + M
2246: *
2247: * Multiply right singular vectors of L in WORK(IR) by Q
2248: * in A, storing result in WORK(IU) and copying to A
2249: * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2250: *
2251: DO 40 I = 1, N, CHUNK
2252: BLK = MIN( N-I+1, CHUNK )
2253: CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2254: $ LDWRKR, A( 1, I ), LDA, ZERO,
2255: $ WORK( IU ), LDWRKU )
2256: CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2257: $ A( 1, I ), LDA )
2258: 40 CONTINUE
2259: *
2260: ELSE
2261: *
2262: * Insufficient workspace for a fast algorithm
2263: *
2264: ITAU = 1
2265: IWORK = ITAU + M
2266: *
2267: * Compute A=L*Q
2268: * (Workspace: need 2*M, prefer M+M*NB)
2269: *
2270: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2271: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2272: *
2273: * Copy L to U, zeroing out above it
2274: *
2275: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2276: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2277: $ LDU )
2278: *
2279: * Generate Q in A
2280: * (Workspace: need 2*M, prefer M+M*NB)
2281: *
2282: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2283: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2284: IE = ITAU
2285: ITAUQ = IE + M
2286: ITAUP = ITAUQ + M
2287: IWORK = ITAUP + M
2288: *
2289: * Bidiagonalize L in U
2290: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2291: *
2292: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2293: $ WORK( ITAUQ ), WORK( ITAUP ),
2294: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2295: *
2296: * Multiply right vectors bidiagonalizing L by Q in A
2297: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2298: *
2299: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2300: $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2301: $ LWORK-IWORK+1, IERR )
2302: *
2303: * Generate left vectors bidiagonalizing L in U
2304: * (Workspace: need 4*M, prefer 3*M+M*NB)
2305: *
2306: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2307: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2308: IWORK = IE + M
2309: *
2310: * Perform bidiagonal QR iteration, computing left
2311: * singular vectors of A in U and computing right
2312: * singular vectors of A in A
2313: * (Workspace: need BDSPAC)
2314: *
2315: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2316: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
2317: *
2318: END IF
2319: *
2320: ELSE IF( WNTVS ) THEN
2321: *
2322: IF( WNTUN ) THEN
2323: *
2324: * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2325: * M right singular vectors to be computed in VT and
2326: * no left singular vectors to be computed
2327: *
2328: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2329: *
2330: * Sufficient workspace for a fast algorithm
2331: *
2332: IR = 1
2333: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2334: *
2335: * WORK(IR) is LDA by M
2336: *
2337: LDWRKR = LDA
2338: ELSE
2339: *
2340: * WORK(IR) is M by M
2341: *
2342: LDWRKR = M
2343: END IF
2344: ITAU = IR + LDWRKR*M
2345: IWORK = ITAU + M
2346: *
2347: * Compute A=L*Q
2348: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2349: *
2350: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2351: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2352: *
2353: * Copy L to WORK(IR), zeroing out above it
2354: *
2355: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2356: $ LDWRKR )
2357: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2358: $ WORK( IR+LDWRKR ), LDWRKR )
2359: *
2360: * Generate Q in A
2361: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2362: *
2363: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2364: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2365: IE = ITAU
2366: ITAUQ = IE + M
2367: ITAUP = ITAUQ + M
2368: IWORK = ITAUP + M
2369: *
2370: * Bidiagonalize L in WORK(IR)
2371: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2372: *
2373: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2374: $ WORK( IE ), WORK( ITAUQ ),
2375: $ WORK( ITAUP ), WORK( IWORK ),
2376: $ LWORK-IWORK+1, IERR )
2377: *
2378: * Generate right vectors bidiagonalizing L in
2379: * WORK(IR)
2380: * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2381: *
2382: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2383: $ WORK( ITAUP ), WORK( IWORK ),
2384: $ LWORK-IWORK+1, IERR )
2385: IWORK = IE + M
2386: *
2387: * Perform bidiagonal QR iteration, computing right
2388: * singular vectors of L in WORK(IR)
2389: * (Workspace: need M*M+BDSPAC)
2390: *
2391: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2392: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2393: $ WORK( IWORK ), INFO )
2394: *
2395: * Multiply right singular vectors of L in WORK(IR) by
2396: * Q in A, storing result in VT
2397: * (Workspace: need M*M)
2398: *
2399: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2400: $ LDWRKR, A, LDA, ZERO, VT, LDVT )
2401: *
2402: ELSE
2403: *
2404: * Insufficient workspace for a fast algorithm
2405: *
2406: ITAU = 1
2407: IWORK = ITAU + M
2408: *
2409: * Compute A=L*Q
2410: * (Workspace: need 2*M, prefer M+M*NB)
2411: *
2412: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2413: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2414: *
2415: * Copy result to VT
2416: *
2417: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2418: *
2419: * Generate Q in VT
2420: * (Workspace: need 2*M, prefer M+M*NB)
2421: *
2422: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2423: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2424: IE = ITAU
2425: ITAUQ = IE + M
2426: ITAUP = ITAUQ + M
2427: IWORK = ITAUP + M
2428: *
2429: * Zero out above L in A
2430: *
2431: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2432: $ LDA )
2433: *
2434: * Bidiagonalize L in A
2435: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2436: *
2437: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2438: $ WORK( ITAUQ ), WORK( ITAUP ),
2439: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2440: *
2441: * Multiply right vectors bidiagonalizing L by Q in VT
2442: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2443: *
2444: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2445: $ WORK( ITAUP ), VT, LDVT,
2446: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2447: IWORK = IE + M
2448: *
2449: * Perform bidiagonal QR iteration, computing right
2450: * singular vectors of A in VT
2451: * (Workspace: need BDSPAC)
2452: *
2453: CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2454: $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2455: $ INFO )
2456: *
2457: END IF
2458: *
2459: ELSE IF( WNTUO ) THEN
2460: *
2461: * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2462: * M right singular vectors to be computed in VT and
2463: * M left singular vectors to be overwritten on A
2464: *
2465: IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2466: *
2467: * Sufficient workspace for a fast algorithm
2468: *
2469: IU = 1
2470: IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2471: *
2472: * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2473: *
2474: LDWRKU = LDA
2475: IR = IU + LDWRKU*M
2476: LDWRKR = LDA
2477: ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2478: *
2479: * WORK(IU) is LDA by M and WORK(IR) is M by M
2480: *
2481: LDWRKU = LDA
2482: IR = IU + LDWRKU*M
2483: LDWRKR = M
2484: ELSE
2485: *
2486: * WORK(IU) is M by M and WORK(IR) is M by M
2487: *
2488: LDWRKU = M
2489: IR = IU + LDWRKU*M
2490: LDWRKR = M
2491: END IF
2492: ITAU = IR + LDWRKR*M
2493: IWORK = ITAU + M
2494: *
2495: * Compute A=L*Q
2496: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2497: *
2498: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2499: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2500: *
2501: * Copy L to WORK(IU), zeroing out below it
2502: *
2503: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2504: $ LDWRKU )
2505: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2506: $ WORK( IU+LDWRKU ), LDWRKU )
2507: *
2508: * Generate Q in A
2509: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2510: *
2511: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2512: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2513: IE = ITAU
2514: ITAUQ = IE + M
2515: ITAUP = ITAUQ + M
2516: IWORK = ITAUP + M
2517: *
2518: * Bidiagonalize L in WORK(IU), copying result to
2519: * WORK(IR)
2520: * (Workspace: need 2*M*M+4*M,
2521: * prefer 2*M*M+3*M+2*M*NB)
2522: *
2523: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2524: $ WORK( IE ), WORK( ITAUQ ),
2525: $ WORK( ITAUP ), WORK( IWORK ),
2526: $ LWORK-IWORK+1, IERR )
2527: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2528: $ WORK( IR ), LDWRKR )
2529: *
2530: * Generate right bidiagonalizing vectors in WORK(IU)
2531: * (Workspace: need 2*M*M+4*M-1,
2532: * prefer 2*M*M+3*M+(M-1)*NB)
2533: *
2534: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2535: $ WORK( ITAUP ), WORK( IWORK ),
2536: $ LWORK-IWORK+1, IERR )
2537: *
2538: * Generate left bidiagonalizing vectors in WORK(IR)
2539: * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2540: *
2541: CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2542: $ WORK( ITAUQ ), WORK( IWORK ),
2543: $ LWORK-IWORK+1, IERR )
2544: IWORK = IE + M
2545: *
2546: * Perform bidiagonal QR iteration, computing left
2547: * singular vectors of L in WORK(IR) and computing
2548: * right singular vectors of L in WORK(IU)
2549: * (Workspace: need 2*M*M+BDSPAC)
2550: *
2551: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2552: $ WORK( IU ), LDWRKU, WORK( IR ),
2553: $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2554: *
2555: * Multiply right singular vectors of L in WORK(IU) by
2556: * Q in A, storing result in VT
2557: * (Workspace: need M*M)
2558: *
2559: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2560: $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2561: *
2562: * Copy left singular vectors of L to A
2563: * (Workspace: need M*M)
2564: *
2565: CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2566: $ LDA )
2567: *
2568: ELSE
2569: *
2570: * Insufficient workspace for a fast algorithm
2571: *
2572: ITAU = 1
2573: IWORK = ITAU + M
2574: *
2575: * Compute A=L*Q, copying result to VT
2576: * (Workspace: need 2*M, prefer M+M*NB)
2577: *
2578: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2579: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2580: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2581: *
2582: * Generate Q in VT
2583: * (Workspace: need 2*M, prefer M+M*NB)
2584: *
2585: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2586: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2587: IE = ITAU
2588: ITAUQ = IE + M
2589: ITAUP = ITAUQ + M
2590: IWORK = ITAUP + M
2591: *
2592: * Zero out above L in A
2593: *
2594: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2595: $ LDA )
2596: *
2597: * Bidiagonalize L in A
2598: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2599: *
2600: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2601: $ WORK( ITAUQ ), WORK( ITAUP ),
2602: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2603: *
2604: * Multiply right vectors bidiagonalizing L by Q in VT
2605: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2606: *
2607: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2608: $ WORK( ITAUP ), VT, LDVT,
2609: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2610: *
2611: * Generate left bidiagonalizing vectors of L in A
2612: * (Workspace: need 4*M, prefer 3*M+M*NB)
2613: *
2614: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2615: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2616: IWORK = IE + M
2617: *
2618: * Perform bidiagonal QR iteration, compute left
2619: * singular vectors of A in A and compute right
2620: * singular vectors of A in VT
2621: * (Workspace: need BDSPAC)
2622: *
2623: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2624: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2625: $ INFO )
2626: *
2627: END IF
2628: *
2629: ELSE IF( WNTUAS ) THEN
2630: *
2631: * Path 6t(N much larger than M, JOBU='S' or 'A',
2632: * JOBVT='S')
2633: * M right singular vectors to be computed in VT and
2634: * M left singular vectors to be computed in U
2635: *
2636: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2637: *
2638: * Sufficient workspace for a fast algorithm
2639: *
2640: IU = 1
2641: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2642: *
2643: * WORK(IU) is LDA by N
2644: *
2645: LDWRKU = LDA
2646: ELSE
2647: *
2648: * WORK(IU) is LDA by M
2649: *
2650: LDWRKU = M
2651: END IF
2652: ITAU = IU + LDWRKU*M
2653: IWORK = ITAU + M
2654: *
2655: * Compute A=L*Q
2656: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2657: *
2658: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2659: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2660: *
2661: * Copy L to WORK(IU), zeroing out above it
2662: *
2663: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2664: $ LDWRKU )
2665: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2666: $ WORK( IU+LDWRKU ), LDWRKU )
2667: *
2668: * Generate Q in A
2669: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2670: *
2671: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2672: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2673: IE = ITAU
2674: ITAUQ = IE + M
2675: ITAUP = ITAUQ + M
2676: IWORK = ITAUP + M
2677: *
2678: * Bidiagonalize L in WORK(IU), copying result to U
2679: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2680: *
2681: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2682: $ WORK( IE ), WORK( ITAUQ ),
2683: $ WORK( ITAUP ), WORK( IWORK ),
2684: $ LWORK-IWORK+1, IERR )
2685: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2686: $ LDU )
2687: *
2688: * Generate right bidiagonalizing vectors in WORK(IU)
2689: * (Workspace: need M*M+4*M-1,
2690: * prefer M*M+3*M+(M-1)*NB)
2691: *
2692: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2693: $ WORK( ITAUP ), WORK( IWORK ),
2694: $ LWORK-IWORK+1, IERR )
2695: *
2696: * Generate left bidiagonalizing vectors in U
2697: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2698: *
2699: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2700: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2701: IWORK = IE + M
2702: *
2703: * Perform bidiagonal QR iteration, computing left
2704: * singular vectors of L in U and computing right
2705: * singular vectors of L in WORK(IU)
2706: * (Workspace: need M*M+BDSPAC)
2707: *
2708: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2709: $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2710: $ WORK( IWORK ), INFO )
2711: *
2712: * Multiply right singular vectors of L in WORK(IU) by
2713: * Q in A, storing result in VT
2714: * (Workspace: need M*M)
2715: *
2716: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2717: $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2718: *
2719: ELSE
2720: *
2721: * Insufficient workspace for a fast algorithm
2722: *
2723: ITAU = 1
2724: IWORK = ITAU + M
2725: *
2726: * Compute A=L*Q, copying result to VT
2727: * (Workspace: need 2*M, prefer M+M*NB)
2728: *
2729: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2730: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2731: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2732: *
2733: * Generate Q in VT
2734: * (Workspace: need 2*M, prefer M+M*NB)
2735: *
2736: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2737: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2738: *
2739: * Copy L to U, zeroing out above it
2740: *
2741: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2742: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2743: $ LDU )
2744: IE = ITAU
2745: ITAUQ = IE + M
2746: ITAUP = ITAUQ + M
2747: IWORK = ITAUP + M
2748: *
2749: * Bidiagonalize L in U
2750: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2751: *
2752: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2753: $ WORK( ITAUQ ), WORK( ITAUP ),
2754: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2755: *
2756: * Multiply right bidiagonalizing vectors in U by Q
2757: * in VT
2758: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2759: *
2760: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2761: $ WORK( ITAUP ), VT, LDVT,
2762: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2763: *
2764: * Generate left bidiagonalizing vectors in U
2765: * (Workspace: need 4*M, prefer 3*M+M*NB)
2766: *
2767: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2768: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2769: IWORK = IE + M
2770: *
2771: * Perform bidiagonal QR iteration, computing left
2772: * singular vectors of A in U and computing right
2773: * singular vectors of A in VT
2774: * (Workspace: need BDSPAC)
2775: *
2776: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2777: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2778: $ INFO )
2779: *
2780: END IF
2781: *
2782: END IF
2783: *
2784: ELSE IF( WNTVA ) THEN
2785: *
2786: IF( WNTUN ) THEN
2787: *
2788: * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2789: * N right singular vectors to be computed in VT and
2790: * no left singular vectors to be computed
2791: *
2792: IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2793: *
2794: * Sufficient workspace for a fast algorithm
2795: *
2796: IR = 1
2797: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2798: *
2799: * WORK(IR) is LDA by M
2800: *
2801: LDWRKR = LDA
2802: ELSE
2803: *
2804: * WORK(IR) is M by M
2805: *
2806: LDWRKR = M
2807: END IF
2808: ITAU = IR + LDWRKR*M
2809: IWORK = ITAU + M
2810: *
2811: * Compute A=L*Q, copying result to VT
2812: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2813: *
2814: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2815: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2816: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2817: *
2818: * Copy L to WORK(IR), zeroing out above it
2819: *
2820: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2821: $ LDWRKR )
2822: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2823: $ WORK( IR+LDWRKR ), LDWRKR )
2824: *
2825: * Generate Q in VT
2826: * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2827: *
2828: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2829: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2830: IE = ITAU
2831: ITAUQ = IE + M
2832: ITAUP = ITAUQ + M
2833: IWORK = ITAUP + M
2834: *
2835: * Bidiagonalize L in WORK(IR)
2836: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2837: *
2838: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2839: $ WORK( IE ), WORK( ITAUQ ),
2840: $ WORK( ITAUP ), WORK( IWORK ),
2841: $ LWORK-IWORK+1, IERR )
2842: *
2843: * Generate right bidiagonalizing vectors in WORK(IR)
2844: * (Workspace: need M*M+4*M-1,
2845: * prefer M*M+3*M+(M-1)*NB)
2846: *
2847: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2848: $ WORK( ITAUP ), WORK( IWORK ),
2849: $ LWORK-IWORK+1, IERR )
2850: IWORK = IE + M
2851: *
2852: * Perform bidiagonal QR iteration, computing right
2853: * singular vectors of L in WORK(IR)
2854: * (Workspace: need M*M+BDSPAC)
2855: *
2856: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2857: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2858: $ WORK( IWORK ), INFO )
2859: *
2860: * Multiply right singular vectors of L in WORK(IR) by
2861: * Q in VT, storing result in A
2862: * (Workspace: need M*M)
2863: *
2864: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2865: $ LDWRKR, VT, LDVT, ZERO, A, LDA )
2866: *
2867: * Copy right singular vectors of A from A to VT
2868: *
2869: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2870: *
2871: ELSE
2872: *
2873: * Insufficient workspace for a fast algorithm
2874: *
2875: ITAU = 1
2876: IWORK = ITAU + M
2877: *
2878: * Compute A=L*Q, copying result to VT
2879: * (Workspace: need 2*M, prefer M+M*NB)
2880: *
2881: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2882: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2883: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2884: *
2885: * Generate Q in VT
2886: * (Workspace: need M+N, prefer M+N*NB)
2887: *
2888: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2889: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2890: IE = ITAU
2891: ITAUQ = IE + M
2892: ITAUP = ITAUQ + M
2893: IWORK = ITAUP + M
2894: *
2895: * Zero out above L in A
2896: *
2897: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2898: $ LDA )
2899: *
2900: * Bidiagonalize L in A
2901: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2902: *
2903: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2904: $ WORK( ITAUQ ), WORK( ITAUP ),
2905: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2906: *
2907: * Multiply right bidiagonalizing vectors in A by Q
2908: * in VT
2909: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2910: *
2911: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2912: $ WORK( ITAUP ), VT, LDVT,
2913: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2914: IWORK = IE + M
2915: *
2916: * Perform bidiagonal QR iteration, computing right
2917: * singular vectors of A in VT
2918: * (Workspace: need BDSPAC)
2919: *
2920: CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2921: $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2922: $ INFO )
2923: *
2924: END IF
2925: *
2926: ELSE IF( WNTUO ) THEN
2927: *
2928: * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2929: * N right singular vectors to be computed in VT and
2930: * M left singular vectors to be overwritten on A
2931: *
2932: IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2933: *
2934: * Sufficient workspace for a fast algorithm
2935: *
2936: IU = 1
2937: IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2938: *
2939: * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2940: *
2941: LDWRKU = LDA
2942: IR = IU + LDWRKU*M
2943: LDWRKR = LDA
2944: ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2945: *
2946: * WORK(IU) is LDA by M and WORK(IR) is M by M
2947: *
2948: LDWRKU = LDA
2949: IR = IU + LDWRKU*M
2950: LDWRKR = M
2951: ELSE
2952: *
2953: * WORK(IU) is M by M and WORK(IR) is M by M
2954: *
2955: LDWRKU = M
2956: IR = IU + LDWRKU*M
2957: LDWRKR = M
2958: END IF
2959: ITAU = IR + LDWRKR*M
2960: IWORK = ITAU + M
2961: *
2962: * Compute A=L*Q, copying result to VT
2963: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2964: *
2965: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2966: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2967: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2968: *
2969: * Generate Q in VT
2970: * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2971: *
2972: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2973: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2974: *
2975: * Copy L to WORK(IU), zeroing out above it
2976: *
2977: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2978: $ LDWRKU )
2979: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2980: $ WORK( IU+LDWRKU ), LDWRKU )
2981: IE = ITAU
2982: ITAUQ = IE + M
2983: ITAUP = ITAUQ + M
2984: IWORK = ITAUP + M
2985: *
2986: * Bidiagonalize L in WORK(IU), copying result to
2987: * WORK(IR)
2988: * (Workspace: need 2*M*M+4*M,
2989: * prefer 2*M*M+3*M+2*M*NB)
2990: *
2991: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2992: $ WORK( IE ), WORK( ITAUQ ),
2993: $ WORK( ITAUP ), WORK( IWORK ),
2994: $ LWORK-IWORK+1, IERR )
2995: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2996: $ WORK( IR ), LDWRKR )
2997: *
2998: * Generate right bidiagonalizing vectors in WORK(IU)
2999: * (Workspace: need 2*M*M+4*M-1,
3000: * prefer 2*M*M+3*M+(M-1)*NB)
3001: *
3002: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3003: $ WORK( ITAUP ), WORK( IWORK ),
3004: $ LWORK-IWORK+1, IERR )
3005: *
3006: * Generate left bidiagonalizing vectors in WORK(IR)
3007: * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3008: *
3009: CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3010: $ WORK( ITAUQ ), WORK( IWORK ),
3011: $ LWORK-IWORK+1, IERR )
3012: IWORK = IE + M
3013: *
3014: * Perform bidiagonal QR iteration, computing left
3015: * singular vectors of L in WORK(IR) and computing
3016: * right singular vectors of L in WORK(IU)
3017: * (Workspace: need 2*M*M+BDSPAC)
3018: *
3019: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3020: $ WORK( IU ), LDWRKU, WORK( IR ),
3021: $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3022: *
3023: * Multiply right singular vectors of L in WORK(IU) by
3024: * Q in VT, storing result in A
3025: * (Workspace: need M*M)
3026: *
3027: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3028: $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3029: *
3030: * Copy right singular vectors of A from A to VT
3031: *
3032: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3033: *
3034: * Copy left singular vectors of A from WORK(IR) to A
3035: *
3036: CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3037: $ LDA )
3038: *
3039: ELSE
3040: *
3041: * Insufficient workspace for a fast algorithm
3042: *
3043: ITAU = 1
3044: IWORK = ITAU + M
3045: *
3046: * Compute A=L*Q, copying result to VT
3047: * (Workspace: need 2*M, prefer M+M*NB)
3048: *
3049: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3050: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3051: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3052: *
3053: * Generate Q in VT
3054: * (Workspace: need M+N, prefer M+N*NB)
3055: *
3056: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3057: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3058: IE = ITAU
3059: ITAUQ = IE + M
3060: ITAUP = ITAUQ + M
3061: IWORK = ITAUP + M
3062: *
3063: * Zero out above L in A
3064: *
3065: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3066: $ LDA )
3067: *
3068: * Bidiagonalize L in A
3069: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3070: *
3071: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3072: $ WORK( ITAUQ ), WORK( ITAUP ),
3073: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3074: *
3075: * Multiply right bidiagonalizing vectors in A by Q
3076: * in VT
3077: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3078: *
3079: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3080: $ WORK( ITAUP ), VT, LDVT,
3081: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3082: *
3083: * Generate left bidiagonalizing vectors in A
3084: * (Workspace: need 4*M, prefer 3*M+M*NB)
3085: *
3086: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3087: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3088: IWORK = IE + M
3089: *
3090: * Perform bidiagonal QR iteration, computing left
3091: * singular vectors of A in A and computing right
3092: * singular vectors of A in VT
3093: * (Workspace: need BDSPAC)
3094: *
3095: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3096: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3097: $ INFO )
3098: *
3099: END IF
3100: *
3101: ELSE IF( WNTUAS ) THEN
3102: *
3103: * Path 9t(N much larger than M, JOBU='S' or 'A',
3104: * JOBVT='A')
3105: * N right singular vectors to be computed in VT and
3106: * M left singular vectors to be computed in U
3107: *
3108: IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3109: *
3110: * Sufficient workspace for a fast algorithm
3111: *
3112: IU = 1
3113: IF( LWORK.GE.WRKBL+LDA*M ) THEN
3114: *
3115: * WORK(IU) is LDA by M
3116: *
3117: LDWRKU = LDA
3118: ELSE
3119: *
3120: * WORK(IU) is M by M
3121: *
3122: LDWRKU = M
3123: END IF
3124: ITAU = IU + LDWRKU*M
3125: IWORK = ITAU + M
3126: *
3127: * Compute A=L*Q, copying result to VT
3128: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3129: *
3130: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3131: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3132: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3133: *
3134: * Generate Q in VT
3135: * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3136: *
3137: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3138: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3139: *
3140: * Copy L to WORK(IU), zeroing out above it
3141: *
3142: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3143: $ LDWRKU )
3144: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3145: $ WORK( IU+LDWRKU ), LDWRKU )
3146: IE = ITAU
3147: ITAUQ = IE + M
3148: ITAUP = ITAUQ + M
3149: IWORK = ITAUP + M
3150: *
3151: * Bidiagonalize L in WORK(IU), copying result to U
3152: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3153: *
3154: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3155: $ WORK( IE ), WORK( ITAUQ ),
3156: $ WORK( ITAUP ), WORK( IWORK ),
3157: $ LWORK-IWORK+1, IERR )
3158: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3159: $ LDU )
3160: *
3161: * Generate right bidiagonalizing vectors in WORK(IU)
3162: * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3163: *
3164: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3165: $ WORK( ITAUP ), WORK( IWORK ),
3166: $ LWORK-IWORK+1, IERR )
3167: *
3168: * Generate left bidiagonalizing vectors in U
3169: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3170: *
3171: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3172: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3173: IWORK = IE + M
3174: *
3175: * Perform bidiagonal QR iteration, computing left
3176: * singular vectors of L in U and computing right
3177: * singular vectors of L in WORK(IU)
3178: * (Workspace: need M*M+BDSPAC)
3179: *
3180: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3181: $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3182: $ WORK( IWORK ), INFO )
3183: *
3184: * Multiply right singular vectors of L in WORK(IU) by
3185: * Q in VT, storing result in A
3186: * (Workspace: need M*M)
3187: *
3188: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3189: $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3190: *
3191: * Copy right singular vectors of A from A to VT
3192: *
3193: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3194: *
3195: ELSE
3196: *
3197: * Insufficient workspace for a fast algorithm
3198: *
3199: ITAU = 1
3200: IWORK = ITAU + M
3201: *
3202: * Compute A=L*Q, copying result to VT
3203: * (Workspace: need 2*M, prefer M+M*NB)
3204: *
3205: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3206: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3207: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3208: *
3209: * Generate Q in VT
3210: * (Workspace: need M+N, prefer M+N*NB)
3211: *
3212: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3213: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3214: *
3215: * Copy L to U, zeroing out above it
3216: *
3217: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3218: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3219: $ LDU )
3220: IE = ITAU
3221: ITAUQ = IE + M
3222: ITAUP = ITAUQ + M
3223: IWORK = ITAUP + M
3224: *
3225: * Bidiagonalize L in U
3226: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3227: *
3228: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3229: $ WORK( ITAUQ ), WORK( ITAUP ),
3230: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3231: *
3232: * Multiply right bidiagonalizing vectors in U by Q
3233: * in VT
3234: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3235: *
3236: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3237: $ WORK( ITAUP ), VT, LDVT,
3238: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3239: *
3240: * Generate left bidiagonalizing vectors in U
3241: * (Workspace: need 4*M, prefer 3*M+M*NB)
3242: *
3243: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3244: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3245: IWORK = IE + M
3246: *
3247: * Perform bidiagonal QR iteration, computing left
3248: * singular vectors of A in U and computing right
3249: * singular vectors of A in VT
3250: * (Workspace: need BDSPAC)
3251: *
3252: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3253: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3254: $ INFO )
3255: *
3256: END IF
3257: *
3258: END IF
3259: *
3260: END IF
3261: *
3262: ELSE
3263: *
3264: * N .LT. MNTHR
3265: *
3266: * Path 10t(N greater than M, but not much larger)
3267: * Reduce to bidiagonal form without LQ decomposition
3268: *
3269: IE = 1
3270: ITAUQ = IE + M
3271: ITAUP = ITAUQ + M
3272: IWORK = ITAUP + M
3273: *
3274: * Bidiagonalize A
3275: * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3276: *
3277: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3278: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3279: $ IERR )
3280: IF( WNTUAS ) THEN
3281: *
3282: * If left singular vectors desired in U, copy result to U
3283: * and generate left bidiagonalizing vectors in U
3284: * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3285: *
3286: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3287: CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3288: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3289: END IF
3290: IF( WNTVAS ) THEN
3291: *
3292: * If right singular vectors desired in VT, copy result to
3293: * VT and generate right bidiagonalizing vectors in VT
3294: * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3295: *
3296: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3297: IF( WNTVA )
3298: $ NRVT = N
3299: IF( WNTVS )
3300: $ NRVT = M
3301: CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3302: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3303: END IF
3304: IF( WNTUO ) THEN
3305: *
3306: * If left singular vectors desired in A, generate left
3307: * bidiagonalizing vectors in A
3308: * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3309: *
3310: CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3311: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3312: END IF
3313: IF( WNTVO ) THEN
3314: *
3315: * If right singular vectors desired in A, generate right
3316: * bidiagonalizing vectors in A
3317: * (Workspace: need 4*M, prefer 3*M+M*NB)
3318: *
3319: CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3320: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3321: END IF
3322: IWORK = IE + M
3323: IF( WNTUAS .OR. WNTUO )
3324: $ NRU = M
3325: IF( WNTUN )
3326: $ NRU = 0
3327: IF( WNTVAS .OR. WNTVO )
3328: $ NCVT = N
3329: IF( WNTVN )
3330: $ NCVT = 0
3331: IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3332: *
3333: * Perform bidiagonal QR iteration, if desired, computing
3334: * left singular vectors in U and computing right singular
3335: * vectors in VT
3336: * (Workspace: need BDSPAC)
3337: *
3338: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3339: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3340: ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3341: *
3342: * Perform bidiagonal QR iteration, if desired, computing
3343: * left singular vectors in U and computing right singular
3344: * vectors in A
3345: * (Workspace: need BDSPAC)
3346: *
3347: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3348: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
3349: ELSE
3350: *
3351: * Perform bidiagonal QR iteration, if desired, computing
3352: * left singular vectors in A and computing right singular
3353: * vectors in VT
3354: * (Workspace: need BDSPAC)
3355: *
3356: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3357: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3358: END IF
3359: *
3360: END IF
3361: *
3362: END IF
3363: *
3364: * If DBDSQR failed to converge, copy unconverged superdiagonals
3365: * to WORK( 2:MINMN )
3366: *
3367: IF( INFO.NE.0 ) THEN
3368: IF( IE.GT.2 ) THEN
3369: DO 50 I = 1, MINMN - 1
3370: WORK( I+1 ) = WORK( I+IE-1 )
3371: 50 CONTINUE
3372: END IF
3373: IF( IE.LT.2 ) THEN
3374: DO 60 I = MINMN - 1, 1, -1
3375: WORK( I+1 ) = WORK( I+IE-1 )
3376: 60 CONTINUE
3377: END IF
3378: END IF
3379: *
3380: * Undo scaling if necessary
3381: *
3382: IF( ISCL.EQ.1 ) THEN
3383: IF( ANRM.GT.BIGNUM )
3384: $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3385: $ IERR )
3386: IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3387: $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3388: $ MINMN, IERR )
3389: IF( ANRM.LT.SMLNUM )
3390: $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3391: $ IERR )
3392: IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3393: $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3394: $ MINMN, IERR )
3395: END IF
3396: *
3397: * Return optimal workspace in WORK(1)
3398: *
3399: WORK( 1 ) = MAXWRK
3400: *
3401: RETURN
3402: *
3403: * End of DGESVD
3404: *
3405: END
CVSweb interface <joel.bertrand@systella.fr>