File:
[local] /
rpl /
lapack /
lapack /
dgesvd.f
Revision
1.14:
download - view:
text,
annotated -
select for diffs -
revision graph
Mon Jan 27 09:28:17 2014 UTC (10 years, 7 months ago) by
bertrand
Branches:
MAIN
CVS tags:
rpl-4_1_24,
rpl-4_1_23,
rpl-4_1_22,
rpl-4_1_21,
rpl-4_1_20,
rpl-4_1_19,
rpl-4_1_18,
rpl-4_1_17,
HEAD
Cohérence.
1: *> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download DGESVD + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22: * WORK, LWORK, INFO )
23: *
24: * .. Scalar Arguments ..
25: * CHARACTER JOBU, JOBVT
26: * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27: * ..
28: * .. Array Arguments ..
29: * DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
30: * $ VT( LDVT, * ), WORK( * )
31: * ..
32: *
33: *
34: *> \par Purpose:
35: * =============
36: *>
37: *> \verbatim
38: *>
39: *> DGESVD computes the singular value decomposition (SVD) of a real
40: *> M-by-N matrix A, optionally computing the left and/or right singular
41: *> vectors. The SVD is written
42: *>
43: *> A = U * SIGMA * transpose(V)
44: *>
45: *> where SIGMA is an M-by-N matrix which is zero except for its
46: *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47: *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48: *> are the singular values of A; they are real and non-negative, and
49: *> are returned in descending order. The first min(m,n) columns of
50: *> U and V are the left and right singular vectors of A.
51: *>
52: *> Note that the routine returns V**T, not V.
53: *> \endverbatim
54: *
55: * Arguments:
56: * ==========
57: *
58: *> \param[in] JOBU
59: *> \verbatim
60: *> JOBU is CHARACTER*1
61: *> Specifies options for computing all or part of the matrix U:
62: *> = 'A': all M columns of U are returned in array U:
63: *> = 'S': the first min(m,n) columns of U (the left singular
64: *> vectors) are returned in the array U;
65: *> = 'O': the first min(m,n) columns of U (the left singular
66: *> vectors) are overwritten on the array A;
67: *> = 'N': no columns of U (no left singular vectors) are
68: *> computed.
69: *> \endverbatim
70: *>
71: *> \param[in] JOBVT
72: *> \verbatim
73: *> JOBVT is CHARACTER*1
74: *> Specifies options for computing all or part of the matrix
75: *> V**T:
76: *> = 'A': all N rows of V**T are returned in the array VT;
77: *> = 'S': the first min(m,n) rows of V**T (the right singular
78: *> vectors) are returned in the array VT;
79: *> = 'O': the first min(m,n) rows of V**T (the right singular
80: *> vectors) are overwritten on the array A;
81: *> = 'N': no rows of V**T (no right singular vectors) are
82: *> computed.
83: *>
84: *> JOBVT and JOBU cannot both be 'O'.
85: *> \endverbatim
86: *>
87: *> \param[in] M
88: *> \verbatim
89: *> M is INTEGER
90: *> The number of rows of the input matrix A. M >= 0.
91: *> \endverbatim
92: *>
93: *> \param[in] N
94: *> \verbatim
95: *> N is INTEGER
96: *> The number of columns of the input matrix A. N >= 0.
97: *> \endverbatim
98: *>
99: *> \param[in,out] A
100: *> \verbatim
101: *> A is DOUBLE PRECISION array, dimension (LDA,N)
102: *> On entry, the M-by-N matrix A.
103: *> On exit,
104: *> if JOBU = 'O', A is overwritten with the first min(m,n)
105: *> columns of U (the left singular vectors,
106: *> stored columnwise);
107: *> if JOBVT = 'O', A is overwritten with the first min(m,n)
108: *> rows of V**T (the right singular vectors,
109: *> stored rowwise);
110: *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111: *> are destroyed.
112: *> \endverbatim
113: *>
114: *> \param[in] LDA
115: *> \verbatim
116: *> LDA is INTEGER
117: *> The leading dimension of the array A. LDA >= max(1,M).
118: *> \endverbatim
119: *>
120: *> \param[out] S
121: *> \verbatim
122: *> S is DOUBLE PRECISION array, dimension (min(M,N))
123: *> The singular values of A, sorted so that S(i) >= S(i+1).
124: *> \endverbatim
125: *>
126: *> \param[out] U
127: *> \verbatim
128: *> U is DOUBLE PRECISION array, dimension (LDU,UCOL)
129: *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130: *> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131: *> if JOBU = 'S', U contains the first min(m,n) columns of U
132: *> (the left singular vectors, stored columnwise);
133: *> if JOBU = 'N' or 'O', U is not referenced.
134: *> \endverbatim
135: *>
136: *> \param[in] LDU
137: *> \verbatim
138: *> LDU is INTEGER
139: *> The leading dimension of the array U. LDU >= 1; if
140: *> JOBU = 'S' or 'A', LDU >= M.
141: *> \endverbatim
142: *>
143: *> \param[out] VT
144: *> \verbatim
145: *> VT is DOUBLE PRECISION array, dimension (LDVT,N)
146: *> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147: *> V**T;
148: *> if JOBVT = 'S', VT contains the first min(m,n) rows of
149: *> V**T (the right singular vectors, stored rowwise);
150: *> if JOBVT = 'N' or 'O', VT is not referenced.
151: *> \endverbatim
152: *>
153: *> \param[in] LDVT
154: *> \verbatim
155: *> LDVT is INTEGER
156: *> The leading dimension of the array VT. LDVT >= 1; if
157: *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158: *> \endverbatim
159: *>
160: *> \param[out] WORK
161: *> \verbatim
162: *> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
163: *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164: *> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165: *> superdiagonal elements of an upper bidiagonal matrix B
166: *> whose diagonal is in S (not necessarily sorted). B
167: *> satisfies A = U * B * VT, so it has the same singular values
168: *> as A, and singular vectors related by U and VT.
169: *> \endverbatim
170: *>
171: *> \param[in] LWORK
172: *> \verbatim
173: *> LWORK is INTEGER
174: *> The dimension of the array WORK.
175: *> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176: *> - PATH 1 (M much larger than N, JOBU='N')
177: *> - PATH 1t (N much larger than M, JOBVT='N')
178: *> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
179: *> For good performance, LWORK should generally be larger.
180: *>
181: *> If LWORK = -1, then a workspace query is assumed; the routine
182: *> only calculates the optimal size of the WORK array, returns
183: *> this value as the first entry of the WORK array, and no error
184: *> message related to LWORK is issued by XERBLA.
185: *> \endverbatim
186: *>
187: *> \param[out] INFO
188: *> \verbatim
189: *> INFO is INTEGER
190: *> = 0: successful exit.
191: *> < 0: if INFO = -i, the i-th argument had an illegal value.
192: *> > 0: if DBDSQR did not converge, INFO specifies how many
193: *> superdiagonals of an intermediate bidiagonal form B
194: *> did not converge to zero. See the description of WORK
195: *> above for details.
196: *> \endverbatim
197: *
198: * Authors:
199: * ========
200: *
201: *> \author Univ. of Tennessee
202: *> \author Univ. of California Berkeley
203: *> \author Univ. of Colorado Denver
204: *> \author NAG Ltd.
205: *
206: *> \date April 2012
207: *
208: *> \ingroup doubleGEsing
209: *
210: * =====================================================================
211: SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
212: $ VT, LDVT, WORK, LWORK, INFO )
213: *
214: * -- LAPACK driver routine (version 3.4.1) --
215: * -- LAPACK is a software package provided by Univ. of Tennessee, --
216: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217: * April 2012
218: *
219: * .. Scalar Arguments ..
220: CHARACTER JOBU, JOBVT
221: INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
222: * ..
223: * .. Array Arguments ..
224: DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
225: $ VT( LDVT, * ), WORK( * )
226: * ..
227: *
228: * =====================================================================
229: *
230: * .. Parameters ..
231: DOUBLE PRECISION ZERO, ONE
232: PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
233: * ..
234: * .. Local Scalars ..
235: LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
236: $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
237: INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
238: $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
239: $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
240: $ NRVT, WRKBL
241: INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
242: $ LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q,
243: $ LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M
244: DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
245: * ..
246: * .. Local Arrays ..
247: DOUBLE PRECISION DUM( 1 )
248: * ..
249: * .. External Subroutines ..
250: EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
251: $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
252: $ XERBLA
253: * ..
254: * .. External Functions ..
255: LOGICAL LSAME
256: INTEGER ILAENV
257: DOUBLE PRECISION DLAMCH, DLANGE
258: EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE
259: * ..
260: * .. Intrinsic Functions ..
261: INTRINSIC MAX, MIN, SQRT
262: * ..
263: * .. Executable Statements ..
264: *
265: * Test the input arguments
266: *
267: INFO = 0
268: MINMN = MIN( M, N )
269: WNTUA = LSAME( JOBU, 'A' )
270: WNTUS = LSAME( JOBU, 'S' )
271: WNTUAS = WNTUA .OR. WNTUS
272: WNTUO = LSAME( JOBU, 'O' )
273: WNTUN = LSAME( JOBU, 'N' )
274: WNTVA = LSAME( JOBVT, 'A' )
275: WNTVS = LSAME( JOBVT, 'S' )
276: WNTVAS = WNTVA .OR. WNTVS
277: WNTVO = LSAME( JOBVT, 'O' )
278: WNTVN = LSAME( JOBVT, 'N' )
279: LQUERY = ( LWORK.EQ.-1 )
280: *
281: IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
282: INFO = -1
283: ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
284: $ ( WNTVO .AND. WNTUO ) ) THEN
285: INFO = -2
286: ELSE IF( M.LT.0 ) THEN
287: INFO = -3
288: ELSE IF( N.LT.0 ) THEN
289: INFO = -4
290: ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
291: INFO = -6
292: ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
293: INFO = -9
294: ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
295: $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
296: INFO = -11
297: END IF
298: *
299: * Compute workspace
300: * (Note: Comments in the code beginning "Workspace:" describe the
301: * minimal amount of workspace needed at that point in the code,
302: * as well as the preferred amount for good performance.
303: * NB refers to the optimal block size for the immediately
304: * following subroutine, as returned by ILAENV.)
305: *
306: IF( INFO.EQ.0 ) THEN
307: MINWRK = 1
308: MAXWRK = 1
309: IF( M.GE.N .AND. MINMN.GT.0 ) THEN
310: *
311: * Compute space needed for DBDSQR
312: *
313: MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
314: BDSPAC = 5*N
315: * Compute space needed for DGEQRF
316: CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
317: LWORK_DGEQRF=DUM(1)
318: * Compute space needed for DORGQR
319: CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
320: LWORK_DORGQR_N=DUM(1)
321: CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
322: LWORK_DORGQR_M=DUM(1)
323: * Compute space needed for DGEBRD
324: CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
325: $ DUM(1), DUM(1), -1, IERR )
326: LWORK_DGEBRD=DUM(1)
327: * Compute space needed for DORGBR P
328: CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
329: $ DUM(1), -1, IERR )
330: LWORK_DORGBR_P=DUM(1)
331: * Compute space needed for DORGBR Q
332: CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1),
333: $ DUM(1), -1, IERR )
334: LWORK_DORGBR_Q=DUM(1)
335: *
336: IF( M.GE.MNTHR ) THEN
337: IF( WNTUN ) THEN
338: *
339: * Path 1 (M much larger than N, JOBU='N')
340: *
341: MAXWRK = N + LWORK_DGEQRF
342: MAXWRK = MAX( MAXWRK, 3*N+LWORK_DGEBRD )
343: IF( WNTVO .OR. WNTVAS )
344: $ MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P )
345: MAXWRK = MAX( MAXWRK, BDSPAC )
346: MINWRK = MAX( 4*N, BDSPAC )
347: ELSE IF( WNTUO .AND. WNTVN ) THEN
348: *
349: * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350: *
351: WRKBL = N + LWORK_DGEQRF
352: WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
353: WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
354: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
355: WRKBL = MAX( WRKBL, BDSPAC )
356: MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
357: MINWRK = MAX( 3*N+M, BDSPAC )
358: ELSE IF( WNTUO .AND. WNTVAS ) THEN
359: *
360: * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361: * 'A')
362: *
363: WRKBL = N + LWORK_DGEQRF
364: WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
365: WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
366: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
367: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
368: WRKBL = MAX( WRKBL, BDSPAC )
369: MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
370: MINWRK = MAX( 3*N+M, BDSPAC )
371: ELSE IF( WNTUS .AND. WNTVN ) THEN
372: *
373: * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374: *
375: WRKBL = N + LWORK_DGEQRF
376: WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
377: WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
378: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
379: WRKBL = MAX( WRKBL, BDSPAC )
380: MAXWRK = N*N + WRKBL
381: MINWRK = MAX( 3*N+M, BDSPAC )
382: ELSE IF( WNTUS .AND. WNTVO ) THEN
383: *
384: * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385: *
386: WRKBL = N + LWORK_DGEQRF
387: WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
388: WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
389: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
390: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
391: WRKBL = MAX( WRKBL, BDSPAC )
392: MAXWRK = 2*N*N + WRKBL
393: MINWRK = MAX( 3*N+M, BDSPAC )
394: ELSE IF( WNTUS .AND. WNTVAS ) THEN
395: *
396: * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397: * 'A')
398: *
399: WRKBL = N + LWORK_DGEQRF
400: WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
401: WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
402: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
403: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
404: WRKBL = MAX( WRKBL, BDSPAC )
405: MAXWRK = N*N + WRKBL
406: MINWRK = MAX( 3*N+M, BDSPAC )
407: ELSE IF( WNTUA .AND. WNTVN ) THEN
408: *
409: * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410: *
411: WRKBL = N + LWORK_DGEQRF
412: WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
413: WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
414: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
415: WRKBL = MAX( WRKBL, BDSPAC )
416: MAXWRK = N*N + WRKBL
417: MINWRK = MAX( 3*N+M, BDSPAC )
418: ELSE IF( WNTUA .AND. WNTVO ) THEN
419: *
420: * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421: *
422: WRKBL = N + LWORK_DGEQRF
423: WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
424: WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
425: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
426: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
427: WRKBL = MAX( WRKBL, BDSPAC )
428: MAXWRK = 2*N*N + WRKBL
429: MINWRK = MAX( 3*N+M, BDSPAC )
430: ELSE IF( WNTUA .AND. WNTVAS ) THEN
431: *
432: * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433: * 'A')
434: *
435: WRKBL = N + LWORK_DGEQRF
436: WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
437: WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
438: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
439: WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
440: WRKBL = MAX( WRKBL, BDSPAC )
441: MAXWRK = N*N + WRKBL
442: MINWRK = MAX( 3*N+M, BDSPAC )
443: END IF
444: ELSE
445: *
446: * Path 10 (M at least N, but not much larger)
447: *
448: CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
449: $ DUM(1), DUM(1), -1, IERR )
450: LWORK_DGEBRD=DUM(1)
451: MAXWRK = 3*N + LWORK_DGEBRD
452: IF( WNTUS .OR. WNTUO ) THEN
453: CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1),
454: $ DUM(1), -1, IERR )
455: LWORK_DORGBR_Q=DUM(1)
456: MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q )
457: END IF
458: IF( WNTUA ) THEN
459: CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1),
460: $ DUM(1), -1, IERR )
461: LWORK_DORGBR_Q=DUM(1)
462: MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q )
463: END IF
464: IF( .NOT.WNTVN ) THEN
465: MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P )
466: END IF
467: MAXWRK = MAX( MAXWRK, BDSPAC )
468: MINWRK = MAX( 3*N+M, BDSPAC )
469: END IF
470: ELSE IF( MINMN.GT.0 ) THEN
471: *
472: * Compute space needed for DBDSQR
473: *
474: MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
475: BDSPAC = 5*M
476: * Compute space needed for DGELQF
477: CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
478: LWORK_DGELQF=DUM(1)
479: * Compute space needed for DORGLQ
480: CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
481: LWORK_DORGLQ_N=DUM(1)
482: CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
483: LWORK_DORGLQ_M=DUM(1)
484: * Compute space needed for DGEBRD
485: CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
486: $ DUM(1), DUM(1), -1, IERR )
487: LWORK_DGEBRD=DUM(1)
488: * Compute space needed for DORGBR P
489: CALL DORGBR( 'P', M, M, M, A, N, DUM(1),
490: $ DUM(1), -1, IERR )
491: LWORK_DORGBR_P=DUM(1)
492: * Compute space needed for DORGBR Q
493: CALL DORGBR( 'Q', M, M, M, A, N, DUM(1),
494: $ DUM(1), -1, IERR )
495: LWORK_DORGBR_Q=DUM(1)
496: IF( N.GE.MNTHR ) THEN
497: IF( WNTVN ) THEN
498: *
499: * Path 1t(N much larger than M, JOBVT='N')
500: *
501: MAXWRK = M + LWORK_DGELQF
502: MAXWRK = MAX( MAXWRK, 3*M+LWORK_DGEBRD )
503: IF( WNTUO .OR. WNTUAS )
504: $ MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q )
505: MAXWRK = MAX( MAXWRK, BDSPAC )
506: MINWRK = MAX( 4*M, BDSPAC )
507: ELSE IF( WNTVO .AND. WNTUN ) THEN
508: *
509: * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510: *
511: WRKBL = M + LWORK_DGELQF
512: WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
513: WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
514: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
515: WRKBL = MAX( WRKBL, BDSPAC )
516: MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
517: MINWRK = MAX( 3*M+N, BDSPAC )
518: ELSE IF( WNTVO .AND. WNTUAS ) THEN
519: *
520: * Path 3t(N much larger than M, JOBU='S' or 'A',
521: * JOBVT='O')
522: *
523: WRKBL = M + LWORK_DGELQF
524: WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
525: WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
526: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
527: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
528: WRKBL = MAX( WRKBL, BDSPAC )
529: MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
530: MINWRK = MAX( 3*M+N, BDSPAC )
531: ELSE IF( WNTVS .AND. WNTUN ) THEN
532: *
533: * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534: *
535: WRKBL = M + LWORK_DGELQF
536: WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
537: WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
538: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
539: WRKBL = MAX( WRKBL, BDSPAC )
540: MAXWRK = M*M + WRKBL
541: MINWRK = MAX( 3*M+N, BDSPAC )
542: ELSE IF( WNTVS .AND. WNTUO ) THEN
543: *
544: * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545: *
546: WRKBL = M + LWORK_DGELQF
547: WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
548: WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
549: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
550: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
551: WRKBL = MAX( WRKBL, BDSPAC )
552: MAXWRK = 2*M*M + WRKBL
553: MINWRK = MAX( 3*M+N, BDSPAC )
554: ELSE IF( WNTVS .AND. WNTUAS ) THEN
555: *
556: * Path 6t(N much larger than M, JOBU='S' or 'A',
557: * JOBVT='S')
558: *
559: WRKBL = M + LWORK_DGELQF
560: WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
561: WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
562: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
563: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
564: WRKBL = MAX( WRKBL, BDSPAC )
565: MAXWRK = M*M + WRKBL
566: MINWRK = MAX( 3*M+N, BDSPAC )
567: ELSE IF( WNTVA .AND. WNTUN ) THEN
568: *
569: * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
570: *
571: WRKBL = M + LWORK_DGELQF
572: WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
573: WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
574: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
575: WRKBL = MAX( WRKBL, BDSPAC )
576: MAXWRK = M*M + WRKBL
577: MINWRK = MAX( 3*M+N, BDSPAC )
578: ELSE IF( WNTVA .AND. WNTUO ) THEN
579: *
580: * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
581: *
582: WRKBL = M + LWORK_DGELQF
583: WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
584: WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
585: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
586: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
587: WRKBL = MAX( WRKBL, BDSPAC )
588: MAXWRK = 2*M*M + WRKBL
589: MINWRK = MAX( 3*M+N, BDSPAC )
590: ELSE IF( WNTVA .AND. WNTUAS ) THEN
591: *
592: * Path 9t(N much larger than M, JOBU='S' or 'A',
593: * JOBVT='A')
594: *
595: WRKBL = M + LWORK_DGELQF
596: WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
597: WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
598: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
599: WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
600: WRKBL = MAX( WRKBL, BDSPAC )
601: MAXWRK = M*M + WRKBL
602: MINWRK = MAX( 3*M+N, BDSPAC )
603: END IF
604: ELSE
605: *
606: * Path 10t(N greater than M, but not much larger)
607: *
608: CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
609: $ DUM(1), DUM(1), -1, IERR )
610: LWORK_DGEBRD=DUM(1)
611: MAXWRK = 3*M + LWORK_DGEBRD
612: IF( WNTVS .OR. WNTVO ) THEN
613: * Compute space needed for DORGBR P
614: CALL DORGBR( 'P', M, N, M, A, N, DUM(1),
615: $ DUM(1), -1, IERR )
616: LWORK_DORGBR_P=DUM(1)
617: MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P )
618: END IF
619: IF( WNTVA ) THEN
620: CALL DORGBR( 'P', N, N, M, A, N, DUM(1),
621: $ DUM(1), -1, IERR )
622: LWORK_DORGBR_P=DUM(1)
623: MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P )
624: END IF
625: IF( .NOT.WNTUN ) THEN
626: MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q )
627: END IF
628: MAXWRK = MAX( MAXWRK, BDSPAC )
629: MINWRK = MAX( 3*M+N, BDSPAC )
630: END IF
631: END IF
632: MAXWRK = MAX( MAXWRK, MINWRK )
633: WORK( 1 ) = MAXWRK
634: *
635: IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
636: INFO = -13
637: END IF
638: END IF
639: *
640: IF( INFO.NE.0 ) THEN
641: CALL XERBLA( 'DGESVD', -INFO )
642: RETURN
643: ELSE IF( LQUERY ) THEN
644: RETURN
645: END IF
646: *
647: * Quick return if possible
648: *
649: IF( M.EQ.0 .OR. N.EQ.0 ) THEN
650: RETURN
651: END IF
652: *
653: * Get machine constants
654: *
655: EPS = DLAMCH( 'P' )
656: SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
657: BIGNUM = ONE / SMLNUM
658: *
659: * Scale A if max element outside range [SMLNUM,BIGNUM]
660: *
661: ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
662: ISCL = 0
663: IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
664: ISCL = 1
665: CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
666: ELSE IF( ANRM.GT.BIGNUM ) THEN
667: ISCL = 1
668: CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
669: END IF
670: *
671: IF( M.GE.N ) THEN
672: *
673: * A has at least as many rows as columns. If A has sufficiently
674: * more rows than columns, first reduce using the QR
675: * decomposition (if sufficient workspace available)
676: *
677: IF( M.GE.MNTHR ) THEN
678: *
679: IF( WNTUN ) THEN
680: *
681: * Path 1 (M much larger than N, JOBU='N')
682: * No left singular vectors to be computed
683: *
684: ITAU = 1
685: IWORK = ITAU + N
686: *
687: * Compute A=Q*R
688: * (Workspace: need 2*N, prefer N+N*NB)
689: *
690: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
691: $ LWORK-IWORK+1, IERR )
692: *
693: * Zero out below R
694: *
695: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
696: IE = 1
697: ITAUQ = IE + N
698: ITAUP = ITAUQ + N
699: IWORK = ITAUP + N
700: *
701: * Bidiagonalize R in A
702: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
703: *
704: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
705: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
706: $ IERR )
707: NCVT = 0
708: IF( WNTVO .OR. WNTVAS ) THEN
709: *
710: * If right singular vectors desired, generate P'.
711: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
712: *
713: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
714: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
715: NCVT = N
716: END IF
717: IWORK = IE + N
718: *
719: * Perform bidiagonal QR iteration, computing right
720: * singular vectors of A in A if desired
721: * (Workspace: need BDSPAC)
722: *
723: CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
724: $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
725: *
726: * If right singular vectors desired in VT, copy them there
727: *
728: IF( WNTVAS )
729: $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
730: *
731: ELSE IF( WNTUO .AND. WNTVN ) THEN
732: *
733: * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
734: * N left singular vectors to be overwritten on A and
735: * no right singular vectors to be computed
736: *
737: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
738: *
739: * Sufficient workspace for a fast algorithm
740: *
741: IR = 1
742: IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
743: *
744: * WORK(IU) is LDA by N, WORK(IR) is LDA by N
745: *
746: LDWRKU = LDA
747: LDWRKR = LDA
748: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
749: *
750: * WORK(IU) is LDA by N, WORK(IR) is N by N
751: *
752: LDWRKU = LDA
753: LDWRKR = N
754: ELSE
755: *
756: * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
757: *
758: LDWRKU = ( LWORK-N*N-N ) / N
759: LDWRKR = N
760: END IF
761: ITAU = IR + LDWRKR*N
762: IWORK = ITAU + N
763: *
764: * Compute A=Q*R
765: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
766: *
767: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
768: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
769: *
770: * Copy R to WORK(IR) and zero out below it
771: *
772: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
773: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
774: $ LDWRKR )
775: *
776: * Generate Q in A
777: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
778: *
779: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
780: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
781: IE = ITAU
782: ITAUQ = IE + N
783: ITAUP = ITAUQ + N
784: IWORK = ITAUP + N
785: *
786: * Bidiagonalize R in WORK(IR)
787: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
788: *
789: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
790: $ WORK( ITAUQ ), WORK( ITAUP ),
791: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
792: *
793: * Generate left vectors bidiagonalizing R
794: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
795: *
796: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
797: $ WORK( ITAUQ ), WORK( IWORK ),
798: $ LWORK-IWORK+1, IERR )
799: IWORK = IE + N
800: *
801: * Perform bidiagonal QR iteration, computing left
802: * singular vectors of R in WORK(IR)
803: * (Workspace: need N*N+BDSPAC)
804: *
805: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
806: $ WORK( IR ), LDWRKR, DUM, 1,
807: $ WORK( IWORK ), INFO )
808: IU = IE + N
809: *
810: * Multiply Q in A by left singular vectors of R in
811: * WORK(IR), storing result in WORK(IU) and copying to A
812: * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
813: *
814: DO 10 I = 1, M, LDWRKU
815: CHUNK = MIN( M-I+1, LDWRKU )
816: CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
817: $ LDA, WORK( IR ), LDWRKR, ZERO,
818: $ WORK( IU ), LDWRKU )
819: CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
820: $ A( I, 1 ), LDA )
821: 10 CONTINUE
822: *
823: ELSE
824: *
825: * Insufficient workspace for a fast algorithm
826: *
827: IE = 1
828: ITAUQ = IE + N
829: ITAUP = ITAUQ + N
830: IWORK = ITAUP + N
831: *
832: * Bidiagonalize A
833: * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
834: *
835: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
836: $ WORK( ITAUQ ), WORK( ITAUP ),
837: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
838: *
839: * Generate left vectors bidiagonalizing A
840: * (Workspace: need 4*N, prefer 3*N+N*NB)
841: *
842: CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
843: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
844: IWORK = IE + N
845: *
846: * Perform bidiagonal QR iteration, computing left
847: * singular vectors of A in A
848: * (Workspace: need BDSPAC)
849: *
850: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
851: $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
852: *
853: END IF
854: *
855: ELSE IF( WNTUO .AND. WNTVAS ) THEN
856: *
857: * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858: * N left singular vectors to be overwritten on A and
859: * N right singular vectors to be computed in VT
860: *
861: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
862: *
863: * Sufficient workspace for a fast algorithm
864: *
865: IR = 1
866: IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
867: *
868: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
869: *
870: LDWRKU = LDA
871: LDWRKR = LDA
872: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
873: *
874: * WORK(IU) is LDA by N and WORK(IR) is N by N
875: *
876: LDWRKU = LDA
877: LDWRKR = N
878: ELSE
879: *
880: * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881: *
882: LDWRKU = ( LWORK-N*N-N ) / N
883: LDWRKR = N
884: END IF
885: ITAU = IR + LDWRKR*N
886: IWORK = ITAU + N
887: *
888: * Compute A=Q*R
889: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
890: *
891: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
892: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
893: *
894: * Copy R to VT, zeroing out below it
895: *
896: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
897: IF( N.GT.1 )
898: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
899: $ VT( 2, 1 ), LDVT )
900: *
901: * Generate Q in A
902: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
903: *
904: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
905: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
906: IE = ITAU
907: ITAUQ = IE + N
908: ITAUP = ITAUQ + N
909: IWORK = ITAUP + N
910: *
911: * Bidiagonalize R in VT, copying result to WORK(IR)
912: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
913: *
914: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
915: $ WORK( ITAUQ ), WORK( ITAUP ),
916: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
917: CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
918: *
919: * Generate left vectors bidiagonalizing R in WORK(IR)
920: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
921: *
922: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
923: $ WORK( ITAUQ ), WORK( IWORK ),
924: $ LWORK-IWORK+1, IERR )
925: *
926: * Generate right vectors bidiagonalizing R in VT
927: * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
928: *
929: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
930: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
931: IWORK = IE + N
932: *
933: * Perform bidiagonal QR iteration, computing left
934: * singular vectors of R in WORK(IR) and computing right
935: * singular vectors of R in VT
936: * (Workspace: need N*N+BDSPAC)
937: *
938: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
939: $ WORK( IR ), LDWRKR, DUM, 1,
940: $ WORK( IWORK ), INFO )
941: IU = IE + N
942: *
943: * Multiply Q in A by left singular vectors of R in
944: * WORK(IR), storing result in WORK(IU) and copying to A
945: * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
946: *
947: DO 20 I = 1, M, LDWRKU
948: CHUNK = MIN( M-I+1, LDWRKU )
949: CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
950: $ LDA, WORK( IR ), LDWRKR, ZERO,
951: $ WORK( IU ), LDWRKU )
952: CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
953: $ A( I, 1 ), LDA )
954: 20 CONTINUE
955: *
956: ELSE
957: *
958: * Insufficient workspace for a fast algorithm
959: *
960: ITAU = 1
961: IWORK = ITAU + N
962: *
963: * Compute A=Q*R
964: * (Workspace: need 2*N, prefer N+N*NB)
965: *
966: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
967: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
968: *
969: * Copy R to VT, zeroing out below it
970: *
971: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
972: IF( N.GT.1 )
973: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
974: $ VT( 2, 1 ), LDVT )
975: *
976: * Generate Q in A
977: * (Workspace: need 2*N, prefer N+N*NB)
978: *
979: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
980: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
981: IE = ITAU
982: ITAUQ = IE + N
983: ITAUP = ITAUQ + N
984: IWORK = ITAUP + N
985: *
986: * Bidiagonalize R in VT
987: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
988: *
989: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
990: $ WORK( ITAUQ ), WORK( ITAUP ),
991: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
992: *
993: * Multiply Q in A by left vectors bidiagonalizing R
994: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
995: *
996: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
997: $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
998: $ LWORK-IWORK+1, IERR )
999: *
1000: * Generate right vectors bidiagonalizing R in VT
1001: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1002: *
1003: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1004: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1005: IWORK = IE + N
1006: *
1007: * Perform bidiagonal QR iteration, computing left
1008: * singular vectors of A in A and computing right
1009: * singular vectors of A in VT
1010: * (Workspace: need BDSPAC)
1011: *
1012: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
1013: $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
1014: *
1015: END IF
1016: *
1017: ELSE IF( WNTUS ) THEN
1018: *
1019: IF( WNTVN ) THEN
1020: *
1021: * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1022: * N left singular vectors to be computed in U and
1023: * no right singular vectors to be computed
1024: *
1025: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1026: *
1027: * Sufficient workspace for a fast algorithm
1028: *
1029: IR = 1
1030: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1031: *
1032: * WORK(IR) is LDA by N
1033: *
1034: LDWRKR = LDA
1035: ELSE
1036: *
1037: * WORK(IR) is N by N
1038: *
1039: LDWRKR = N
1040: END IF
1041: ITAU = IR + LDWRKR*N
1042: IWORK = ITAU + N
1043: *
1044: * Compute A=Q*R
1045: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1046: *
1047: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1048: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1049: *
1050: * Copy R to WORK(IR), zeroing out below it
1051: *
1052: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1053: $ LDWRKR )
1054: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1055: $ WORK( IR+1 ), LDWRKR )
1056: *
1057: * Generate Q in A
1058: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1059: *
1060: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1061: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1062: IE = ITAU
1063: ITAUQ = IE + N
1064: ITAUP = ITAUQ + N
1065: IWORK = ITAUP + N
1066: *
1067: * Bidiagonalize R in WORK(IR)
1068: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1069: *
1070: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1071: $ WORK( IE ), WORK( ITAUQ ),
1072: $ WORK( ITAUP ), WORK( IWORK ),
1073: $ LWORK-IWORK+1, IERR )
1074: *
1075: * Generate left vectors bidiagonalizing R in WORK(IR)
1076: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1077: *
1078: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1079: $ WORK( ITAUQ ), WORK( IWORK ),
1080: $ LWORK-IWORK+1, IERR )
1081: IWORK = IE + N
1082: *
1083: * Perform bidiagonal QR iteration, computing left
1084: * singular vectors of R in WORK(IR)
1085: * (Workspace: need N*N+BDSPAC)
1086: *
1087: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1088: $ 1, WORK( IR ), LDWRKR, DUM, 1,
1089: $ WORK( IWORK ), INFO )
1090: *
1091: * Multiply Q in A by left singular vectors of R in
1092: * WORK(IR), storing result in U
1093: * (Workspace: need N*N)
1094: *
1095: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1096: $ WORK( IR ), LDWRKR, ZERO, U, LDU )
1097: *
1098: ELSE
1099: *
1100: * Insufficient workspace for a fast algorithm
1101: *
1102: ITAU = 1
1103: IWORK = ITAU + N
1104: *
1105: * Compute A=Q*R, copying result to U
1106: * (Workspace: need 2*N, prefer N+N*NB)
1107: *
1108: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1109: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1110: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1111: *
1112: * Generate Q in U
1113: * (Workspace: need 2*N, prefer N+N*NB)
1114: *
1115: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1116: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1117: IE = ITAU
1118: ITAUQ = IE + N
1119: ITAUP = ITAUQ + N
1120: IWORK = ITAUP + N
1121: *
1122: * Zero out below R in A
1123: *
1124: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1125: $ LDA )
1126: *
1127: * Bidiagonalize R in A
1128: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1129: *
1130: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1131: $ WORK( ITAUQ ), WORK( ITAUP ),
1132: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1133: *
1134: * Multiply Q in U by left vectors bidiagonalizing R
1135: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1136: *
1137: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1138: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1139: $ LWORK-IWORK+1, IERR )
1140: IWORK = IE + N
1141: *
1142: * Perform bidiagonal QR iteration, computing left
1143: * singular vectors of A in U
1144: * (Workspace: need BDSPAC)
1145: *
1146: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1147: $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1148: $ INFO )
1149: *
1150: END IF
1151: *
1152: ELSE IF( WNTVO ) THEN
1153: *
1154: * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1155: * N left singular vectors to be computed in U and
1156: * N right singular vectors to be overwritten on A
1157: *
1158: IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1159: *
1160: * Sufficient workspace for a fast algorithm
1161: *
1162: IU = 1
1163: IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1164: *
1165: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1166: *
1167: LDWRKU = LDA
1168: IR = IU + LDWRKU*N
1169: LDWRKR = LDA
1170: ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1171: *
1172: * WORK(IU) is LDA by N and WORK(IR) is N by N
1173: *
1174: LDWRKU = LDA
1175: IR = IU + LDWRKU*N
1176: LDWRKR = N
1177: ELSE
1178: *
1179: * WORK(IU) is N by N and WORK(IR) is N by N
1180: *
1181: LDWRKU = N
1182: IR = IU + LDWRKU*N
1183: LDWRKR = N
1184: END IF
1185: ITAU = IR + LDWRKR*N
1186: IWORK = ITAU + N
1187: *
1188: * Compute A=Q*R
1189: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1190: *
1191: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1192: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1193: *
1194: * Copy R to WORK(IU), zeroing out below it
1195: *
1196: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1197: $ LDWRKU )
1198: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1199: $ WORK( IU+1 ), LDWRKU )
1200: *
1201: * Generate Q in A
1202: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1203: *
1204: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1205: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1206: IE = ITAU
1207: ITAUQ = IE + N
1208: ITAUP = ITAUQ + N
1209: IWORK = ITAUP + N
1210: *
1211: * Bidiagonalize R in WORK(IU), copying result to
1212: * WORK(IR)
1213: * (Workspace: need 2*N*N+4*N,
1214: * prefer 2*N*N+3*N+2*N*NB)
1215: *
1216: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1217: $ WORK( IE ), WORK( ITAUQ ),
1218: $ WORK( ITAUP ), WORK( IWORK ),
1219: $ LWORK-IWORK+1, IERR )
1220: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1221: $ WORK( IR ), LDWRKR )
1222: *
1223: * Generate left bidiagonalizing vectors in WORK(IU)
1224: * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1225: *
1226: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1227: $ WORK( ITAUQ ), WORK( IWORK ),
1228: $ LWORK-IWORK+1, IERR )
1229: *
1230: * Generate right bidiagonalizing vectors in WORK(IR)
1231: * (Workspace: need 2*N*N+4*N-1,
1232: * prefer 2*N*N+3*N+(N-1)*NB)
1233: *
1234: CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1235: $ WORK( ITAUP ), WORK( IWORK ),
1236: $ LWORK-IWORK+1, IERR )
1237: IWORK = IE + N
1238: *
1239: * Perform bidiagonal QR iteration, computing left
1240: * singular vectors of R in WORK(IU) and computing
1241: * right singular vectors of R in WORK(IR)
1242: * (Workspace: need 2*N*N+BDSPAC)
1243: *
1244: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1245: $ WORK( IR ), LDWRKR, WORK( IU ),
1246: $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1247: *
1248: * Multiply Q in A by left singular vectors of R in
1249: * WORK(IU), storing result in U
1250: * (Workspace: need N*N)
1251: *
1252: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1253: $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1254: *
1255: * Copy right singular vectors of R to A
1256: * (Workspace: need N*N)
1257: *
1258: CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1259: $ LDA )
1260: *
1261: ELSE
1262: *
1263: * Insufficient workspace for a fast algorithm
1264: *
1265: ITAU = 1
1266: IWORK = ITAU + N
1267: *
1268: * Compute A=Q*R, copying result to U
1269: * (Workspace: need 2*N, prefer N+N*NB)
1270: *
1271: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1272: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1273: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1274: *
1275: * Generate Q in U
1276: * (Workspace: need 2*N, prefer N+N*NB)
1277: *
1278: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1279: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1280: IE = ITAU
1281: ITAUQ = IE + N
1282: ITAUP = ITAUQ + N
1283: IWORK = ITAUP + N
1284: *
1285: * Zero out below R in A
1286: *
1287: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1288: $ LDA )
1289: *
1290: * Bidiagonalize R in A
1291: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1292: *
1293: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1294: $ WORK( ITAUQ ), WORK( ITAUP ),
1295: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1296: *
1297: * Multiply Q in U by left vectors bidiagonalizing R
1298: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1299: *
1300: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1301: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1302: $ LWORK-IWORK+1, IERR )
1303: *
1304: * Generate right vectors bidiagonalizing R in A
1305: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1306: *
1307: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1308: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1309: IWORK = IE + N
1310: *
1311: * Perform bidiagonal QR iteration, computing left
1312: * singular vectors of A in U and computing right
1313: * singular vectors of A in A
1314: * (Workspace: need BDSPAC)
1315: *
1316: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1317: $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1318: $ INFO )
1319: *
1320: END IF
1321: *
1322: ELSE IF( WNTVAS ) THEN
1323: *
1324: * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1325: * or 'A')
1326: * N left singular vectors to be computed in U and
1327: * N right singular vectors to be computed in VT
1328: *
1329: IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1330: *
1331: * Sufficient workspace for a fast algorithm
1332: *
1333: IU = 1
1334: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1335: *
1336: * WORK(IU) is LDA by N
1337: *
1338: LDWRKU = LDA
1339: ELSE
1340: *
1341: * WORK(IU) is N by N
1342: *
1343: LDWRKU = N
1344: END IF
1345: ITAU = IU + LDWRKU*N
1346: IWORK = ITAU + N
1347: *
1348: * Compute A=Q*R
1349: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1350: *
1351: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1352: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1353: *
1354: * Copy R to WORK(IU), zeroing out below it
1355: *
1356: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1357: $ LDWRKU )
1358: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1359: $ WORK( IU+1 ), LDWRKU )
1360: *
1361: * Generate Q in A
1362: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1363: *
1364: CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1365: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1366: IE = ITAU
1367: ITAUQ = IE + N
1368: ITAUP = ITAUQ + N
1369: IWORK = ITAUP + N
1370: *
1371: * Bidiagonalize R in WORK(IU), copying result to VT
1372: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1373: *
1374: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1375: $ WORK( IE ), WORK( ITAUQ ),
1376: $ WORK( ITAUP ), WORK( IWORK ),
1377: $ LWORK-IWORK+1, IERR )
1378: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1379: $ LDVT )
1380: *
1381: * Generate left bidiagonalizing vectors in WORK(IU)
1382: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1383: *
1384: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1385: $ WORK( ITAUQ ), WORK( IWORK ),
1386: $ LWORK-IWORK+1, IERR )
1387: *
1388: * Generate right bidiagonalizing vectors in VT
1389: * (Workspace: need N*N+4*N-1,
1390: * prefer N*N+3*N+(N-1)*NB)
1391: *
1392: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1393: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1394: IWORK = IE + N
1395: *
1396: * Perform bidiagonal QR iteration, computing left
1397: * singular vectors of R in WORK(IU) and computing
1398: * right singular vectors of R in VT
1399: * (Workspace: need N*N+BDSPAC)
1400: *
1401: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1402: $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1403: $ WORK( IWORK ), INFO )
1404: *
1405: * Multiply Q in A by left singular vectors of R in
1406: * WORK(IU), storing result in U
1407: * (Workspace: need N*N)
1408: *
1409: CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1410: $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1411: *
1412: ELSE
1413: *
1414: * Insufficient workspace for a fast algorithm
1415: *
1416: ITAU = 1
1417: IWORK = ITAU + N
1418: *
1419: * Compute A=Q*R, copying result to U
1420: * (Workspace: need 2*N, prefer N+N*NB)
1421: *
1422: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1423: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1424: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1425: *
1426: * Generate Q in U
1427: * (Workspace: need 2*N, prefer N+N*NB)
1428: *
1429: CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1430: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1431: *
1432: * Copy R to VT, zeroing out below it
1433: *
1434: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1435: IF( N.GT.1 )
1436: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1437: $ VT( 2, 1 ), LDVT )
1438: IE = ITAU
1439: ITAUQ = IE + N
1440: ITAUP = ITAUQ + N
1441: IWORK = ITAUP + N
1442: *
1443: * Bidiagonalize R in VT
1444: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1445: *
1446: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1447: $ WORK( ITAUQ ), WORK( ITAUP ),
1448: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1449: *
1450: * Multiply Q in U by left bidiagonalizing vectors
1451: * in VT
1452: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1453: *
1454: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1455: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1456: $ LWORK-IWORK+1, IERR )
1457: *
1458: * Generate right bidiagonalizing vectors in VT
1459: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1460: *
1461: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1462: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1463: IWORK = IE + N
1464: *
1465: * Perform bidiagonal QR iteration, computing left
1466: * singular vectors of A in U and computing right
1467: * singular vectors of A in VT
1468: * (Workspace: need BDSPAC)
1469: *
1470: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1471: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1472: $ INFO )
1473: *
1474: END IF
1475: *
1476: END IF
1477: *
1478: ELSE IF( WNTUA ) THEN
1479: *
1480: IF( WNTVN ) THEN
1481: *
1482: * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1483: * M left singular vectors to be computed in U and
1484: * no right singular vectors to be computed
1485: *
1486: IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1487: *
1488: * Sufficient workspace for a fast algorithm
1489: *
1490: IR = 1
1491: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1492: *
1493: * WORK(IR) is LDA by N
1494: *
1495: LDWRKR = LDA
1496: ELSE
1497: *
1498: * WORK(IR) is N by N
1499: *
1500: LDWRKR = N
1501: END IF
1502: ITAU = IR + LDWRKR*N
1503: IWORK = ITAU + N
1504: *
1505: * Compute A=Q*R, copying result to U
1506: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1507: *
1508: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1509: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1510: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1511: *
1512: * Copy R to WORK(IR), zeroing out below it
1513: *
1514: CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1515: $ LDWRKR )
1516: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1517: $ WORK( IR+1 ), LDWRKR )
1518: *
1519: * Generate Q in U
1520: * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1521: *
1522: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1523: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1524: IE = ITAU
1525: ITAUQ = IE + N
1526: ITAUP = ITAUQ + N
1527: IWORK = ITAUP + N
1528: *
1529: * Bidiagonalize R in WORK(IR)
1530: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1531: *
1532: CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1533: $ WORK( IE ), WORK( ITAUQ ),
1534: $ WORK( ITAUP ), WORK( IWORK ),
1535: $ LWORK-IWORK+1, IERR )
1536: *
1537: * Generate left bidiagonalizing vectors in WORK(IR)
1538: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1539: *
1540: CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1541: $ WORK( ITAUQ ), WORK( IWORK ),
1542: $ LWORK-IWORK+1, IERR )
1543: IWORK = IE + N
1544: *
1545: * Perform bidiagonal QR iteration, computing left
1546: * singular vectors of R in WORK(IR)
1547: * (Workspace: need N*N+BDSPAC)
1548: *
1549: CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1550: $ 1, WORK( IR ), LDWRKR, DUM, 1,
1551: $ WORK( IWORK ), INFO )
1552: *
1553: * Multiply Q in U by left singular vectors of R in
1554: * WORK(IR), storing result in A
1555: * (Workspace: need N*N)
1556: *
1557: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1558: $ WORK( IR ), LDWRKR, ZERO, A, LDA )
1559: *
1560: * Copy left singular vectors of A from A to U
1561: *
1562: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1563: *
1564: ELSE
1565: *
1566: * Insufficient workspace for a fast algorithm
1567: *
1568: ITAU = 1
1569: IWORK = ITAU + N
1570: *
1571: * Compute A=Q*R, copying result to U
1572: * (Workspace: need 2*N, prefer N+N*NB)
1573: *
1574: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1575: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1576: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1577: *
1578: * Generate Q in U
1579: * (Workspace: need N+M, prefer N+M*NB)
1580: *
1581: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1582: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1583: IE = ITAU
1584: ITAUQ = IE + N
1585: ITAUP = ITAUQ + N
1586: IWORK = ITAUP + N
1587: *
1588: * Zero out below R in A
1589: *
1590: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1591: $ LDA )
1592: *
1593: * Bidiagonalize R in A
1594: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1595: *
1596: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1597: $ WORK( ITAUQ ), WORK( ITAUP ),
1598: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1599: *
1600: * Multiply Q in U by left bidiagonalizing vectors
1601: * in A
1602: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1603: *
1604: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1605: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1606: $ LWORK-IWORK+1, IERR )
1607: IWORK = IE + N
1608: *
1609: * Perform bidiagonal QR iteration, computing left
1610: * singular vectors of A in U
1611: * (Workspace: need BDSPAC)
1612: *
1613: CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1614: $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1615: $ INFO )
1616: *
1617: END IF
1618: *
1619: ELSE IF( WNTVO ) THEN
1620: *
1621: * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1622: * M left singular vectors to be computed in U and
1623: * N right singular vectors to be overwritten on A
1624: *
1625: IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1626: *
1627: * Sufficient workspace for a fast algorithm
1628: *
1629: IU = 1
1630: IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1631: *
1632: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1633: *
1634: LDWRKU = LDA
1635: IR = IU + LDWRKU*N
1636: LDWRKR = LDA
1637: ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1638: *
1639: * WORK(IU) is LDA by N and WORK(IR) is N by N
1640: *
1641: LDWRKU = LDA
1642: IR = IU + LDWRKU*N
1643: LDWRKR = N
1644: ELSE
1645: *
1646: * WORK(IU) is N by N and WORK(IR) is N by N
1647: *
1648: LDWRKU = N
1649: IR = IU + LDWRKU*N
1650: LDWRKR = N
1651: END IF
1652: ITAU = IR + LDWRKR*N
1653: IWORK = ITAU + N
1654: *
1655: * Compute A=Q*R, copying result to U
1656: * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1657: *
1658: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1659: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1660: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1661: *
1662: * Generate Q in U
1663: * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1664: *
1665: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1666: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1667: *
1668: * Copy R to WORK(IU), zeroing out below it
1669: *
1670: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1671: $ LDWRKU )
1672: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1673: $ WORK( IU+1 ), LDWRKU )
1674: IE = ITAU
1675: ITAUQ = IE + N
1676: ITAUP = ITAUQ + N
1677: IWORK = ITAUP + N
1678: *
1679: * Bidiagonalize R in WORK(IU), copying result to
1680: * WORK(IR)
1681: * (Workspace: need 2*N*N+4*N,
1682: * prefer 2*N*N+3*N+2*N*NB)
1683: *
1684: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1685: $ WORK( IE ), WORK( ITAUQ ),
1686: $ WORK( ITAUP ), WORK( IWORK ),
1687: $ LWORK-IWORK+1, IERR )
1688: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1689: $ WORK( IR ), LDWRKR )
1690: *
1691: * Generate left bidiagonalizing vectors in WORK(IU)
1692: * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1693: *
1694: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1695: $ WORK( ITAUQ ), WORK( IWORK ),
1696: $ LWORK-IWORK+1, IERR )
1697: *
1698: * Generate right bidiagonalizing vectors in WORK(IR)
1699: * (Workspace: need 2*N*N+4*N-1,
1700: * prefer 2*N*N+3*N+(N-1)*NB)
1701: *
1702: CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1703: $ WORK( ITAUP ), WORK( IWORK ),
1704: $ LWORK-IWORK+1, IERR )
1705: IWORK = IE + N
1706: *
1707: * Perform bidiagonal QR iteration, computing left
1708: * singular vectors of R in WORK(IU) and computing
1709: * right singular vectors of R in WORK(IR)
1710: * (Workspace: need 2*N*N+BDSPAC)
1711: *
1712: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1713: $ WORK( IR ), LDWRKR, WORK( IU ),
1714: $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1715: *
1716: * Multiply Q in U by left singular vectors of R in
1717: * WORK(IU), storing result in A
1718: * (Workspace: need N*N)
1719: *
1720: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1721: $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1722: *
1723: * Copy left singular vectors of A from A to U
1724: *
1725: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1726: *
1727: * Copy right singular vectors of R from WORK(IR) to A
1728: *
1729: CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1730: $ LDA )
1731: *
1732: ELSE
1733: *
1734: * Insufficient workspace for a fast algorithm
1735: *
1736: ITAU = 1
1737: IWORK = ITAU + N
1738: *
1739: * Compute A=Q*R, copying result to U
1740: * (Workspace: need 2*N, prefer N+N*NB)
1741: *
1742: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1743: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1744: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1745: *
1746: * Generate Q in U
1747: * (Workspace: need N+M, prefer N+M*NB)
1748: *
1749: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1750: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1751: IE = ITAU
1752: ITAUQ = IE + N
1753: ITAUP = ITAUQ + N
1754: IWORK = ITAUP + N
1755: *
1756: * Zero out below R in A
1757: *
1758: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1759: $ LDA )
1760: *
1761: * Bidiagonalize R in A
1762: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1763: *
1764: CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1765: $ WORK( ITAUQ ), WORK( ITAUP ),
1766: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1767: *
1768: * Multiply Q in U by left bidiagonalizing vectors
1769: * in A
1770: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1771: *
1772: CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1773: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1774: $ LWORK-IWORK+1, IERR )
1775: *
1776: * Generate right bidiagonalizing vectors in A
1777: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1778: *
1779: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1780: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1781: IWORK = IE + N
1782: *
1783: * Perform bidiagonal QR iteration, computing left
1784: * singular vectors of A in U and computing right
1785: * singular vectors of A in A
1786: * (Workspace: need BDSPAC)
1787: *
1788: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1789: $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1790: $ INFO )
1791: *
1792: END IF
1793: *
1794: ELSE IF( WNTVAS ) THEN
1795: *
1796: * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1797: * or 'A')
1798: * M left singular vectors to be computed in U and
1799: * N right singular vectors to be computed in VT
1800: *
1801: IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1802: *
1803: * Sufficient workspace for a fast algorithm
1804: *
1805: IU = 1
1806: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1807: *
1808: * WORK(IU) is LDA by N
1809: *
1810: LDWRKU = LDA
1811: ELSE
1812: *
1813: * WORK(IU) is N by N
1814: *
1815: LDWRKU = N
1816: END IF
1817: ITAU = IU + LDWRKU*N
1818: IWORK = ITAU + N
1819: *
1820: * Compute A=Q*R, copying result to U
1821: * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1822: *
1823: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1824: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1825: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1826: *
1827: * Generate Q in U
1828: * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1829: *
1830: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1831: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1832: *
1833: * Copy R to WORK(IU), zeroing out below it
1834: *
1835: CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1836: $ LDWRKU )
1837: CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1838: $ WORK( IU+1 ), LDWRKU )
1839: IE = ITAU
1840: ITAUQ = IE + N
1841: ITAUP = ITAUQ + N
1842: IWORK = ITAUP + N
1843: *
1844: * Bidiagonalize R in WORK(IU), copying result to VT
1845: * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1846: *
1847: CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1848: $ WORK( IE ), WORK( ITAUQ ),
1849: $ WORK( ITAUP ), WORK( IWORK ),
1850: $ LWORK-IWORK+1, IERR )
1851: CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1852: $ LDVT )
1853: *
1854: * Generate left bidiagonalizing vectors in WORK(IU)
1855: * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1856: *
1857: CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1858: $ WORK( ITAUQ ), WORK( IWORK ),
1859: $ LWORK-IWORK+1, IERR )
1860: *
1861: * Generate right bidiagonalizing vectors in VT
1862: * (Workspace: need N*N+4*N-1,
1863: * prefer N*N+3*N+(N-1)*NB)
1864: *
1865: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1866: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1867: IWORK = IE + N
1868: *
1869: * Perform bidiagonal QR iteration, computing left
1870: * singular vectors of R in WORK(IU) and computing
1871: * right singular vectors of R in VT
1872: * (Workspace: need N*N+BDSPAC)
1873: *
1874: CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1875: $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1876: $ WORK( IWORK ), INFO )
1877: *
1878: * Multiply Q in U by left singular vectors of R in
1879: * WORK(IU), storing result in A
1880: * (Workspace: need N*N)
1881: *
1882: CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1883: $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1884: *
1885: * Copy left singular vectors of A from A to U
1886: *
1887: CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1888: *
1889: ELSE
1890: *
1891: * Insufficient workspace for a fast algorithm
1892: *
1893: ITAU = 1
1894: IWORK = ITAU + N
1895: *
1896: * Compute A=Q*R, copying result to U
1897: * (Workspace: need 2*N, prefer N+N*NB)
1898: *
1899: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1900: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1901: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1902: *
1903: * Generate Q in U
1904: * (Workspace: need N+M, prefer N+M*NB)
1905: *
1906: CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1907: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1908: *
1909: * Copy R from A to VT, zeroing out below it
1910: *
1911: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1912: IF( N.GT.1 )
1913: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1914: $ VT( 2, 1 ), LDVT )
1915: IE = ITAU
1916: ITAUQ = IE + N
1917: ITAUP = ITAUQ + N
1918: IWORK = ITAUP + N
1919: *
1920: * Bidiagonalize R in VT
1921: * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1922: *
1923: CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1924: $ WORK( ITAUQ ), WORK( ITAUP ),
1925: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1926: *
1927: * Multiply Q in U by left bidiagonalizing vectors
1928: * in VT
1929: * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1930: *
1931: CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1932: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1933: $ LWORK-IWORK+1, IERR )
1934: *
1935: * Generate right bidiagonalizing vectors in VT
1936: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1937: *
1938: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1939: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1940: IWORK = IE + N
1941: *
1942: * Perform bidiagonal QR iteration, computing left
1943: * singular vectors of A in U and computing right
1944: * singular vectors of A in VT
1945: * (Workspace: need BDSPAC)
1946: *
1947: CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1948: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1949: $ INFO )
1950: *
1951: END IF
1952: *
1953: END IF
1954: *
1955: END IF
1956: *
1957: ELSE
1958: *
1959: * M .LT. MNTHR
1960: *
1961: * Path 10 (M at least N, but not much larger)
1962: * Reduce to bidiagonal form without QR decomposition
1963: *
1964: IE = 1
1965: ITAUQ = IE + N
1966: ITAUP = ITAUQ + N
1967: IWORK = ITAUP + N
1968: *
1969: * Bidiagonalize A
1970: * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1971: *
1972: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1973: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1974: $ IERR )
1975: IF( WNTUAS ) THEN
1976: *
1977: * If left singular vectors desired in U, copy result to U
1978: * and generate left bidiagonalizing vectors in U
1979: * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1980: *
1981: CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1982: IF( WNTUS )
1983: $ NCU = N
1984: IF( WNTUA )
1985: $ NCU = M
1986: CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1987: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1988: END IF
1989: IF( WNTVAS ) THEN
1990: *
1991: * If right singular vectors desired in VT, copy result to
1992: * VT and generate right bidiagonalizing vectors in VT
1993: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1994: *
1995: CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1996: CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1997: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1998: END IF
1999: IF( WNTUO ) THEN
2000: *
2001: * If left singular vectors desired in A, generate left
2002: * bidiagonalizing vectors in A
2003: * (Workspace: need 4*N, prefer 3*N+N*NB)
2004: *
2005: CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2006: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2007: END IF
2008: IF( WNTVO ) THEN
2009: *
2010: * If right singular vectors desired in A, generate right
2011: * bidiagonalizing vectors in A
2012: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2013: *
2014: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2015: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2016: END IF
2017: IWORK = IE + N
2018: IF( WNTUAS .OR. WNTUO )
2019: $ NRU = M
2020: IF( WNTUN )
2021: $ NRU = 0
2022: IF( WNTVAS .OR. WNTVO )
2023: $ NCVT = N
2024: IF( WNTVN )
2025: $ NCVT = 0
2026: IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2027: *
2028: * Perform bidiagonal QR iteration, if desired, computing
2029: * left singular vectors in U and computing right singular
2030: * vectors in VT
2031: * (Workspace: need BDSPAC)
2032: *
2033: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2034: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
2035: ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2036: *
2037: * Perform bidiagonal QR iteration, if desired, computing
2038: * left singular vectors in U and computing right singular
2039: * vectors in A
2040: * (Workspace: need BDSPAC)
2041: *
2042: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
2043: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
2044: ELSE
2045: *
2046: * Perform bidiagonal QR iteration, if desired, computing
2047: * left singular vectors in A and computing right singular
2048: * vectors in VT
2049: * (Workspace: need BDSPAC)
2050: *
2051: CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2052: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
2053: END IF
2054: *
2055: END IF
2056: *
2057: ELSE
2058: *
2059: * A has more columns than rows. If A has sufficiently more
2060: * columns than rows, first reduce using the LQ decomposition (if
2061: * sufficient workspace available)
2062: *
2063: IF( N.GE.MNTHR ) THEN
2064: *
2065: IF( WNTVN ) THEN
2066: *
2067: * Path 1t(N much larger than M, JOBVT='N')
2068: * No right singular vectors to be computed
2069: *
2070: ITAU = 1
2071: IWORK = ITAU + M
2072: *
2073: * Compute A=L*Q
2074: * (Workspace: need 2*M, prefer M+M*NB)
2075: *
2076: CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2077: $ LWORK-IWORK+1, IERR )
2078: *
2079: * Zero out above L
2080: *
2081: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
2082: IE = 1
2083: ITAUQ = IE + M
2084: ITAUP = ITAUQ + M
2085: IWORK = ITAUP + M
2086: *
2087: * Bidiagonalize L in A
2088: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2089: *
2090: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2091: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2092: $ IERR )
2093: IF( WNTUO .OR. WNTUAS ) THEN
2094: *
2095: * If left singular vectors desired, generate Q
2096: * (Workspace: need 4*M, prefer 3*M+M*NB)
2097: *
2098: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2099: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2100: END IF
2101: IWORK = IE + M
2102: NRU = 0
2103: IF( WNTUO .OR. WNTUAS )
2104: $ NRU = M
2105: *
2106: * Perform bidiagonal QR iteration, computing left singular
2107: * vectors of A in A if desired
2108: * (Workspace: need BDSPAC)
2109: *
2110: CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2111: $ LDA, DUM, 1, WORK( IWORK ), INFO )
2112: *
2113: * If left singular vectors desired in U, copy them there
2114: *
2115: IF( WNTUAS )
2116: $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2117: *
2118: ELSE IF( WNTVO .AND. WNTUN ) THEN
2119: *
2120: * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2121: * M right singular vectors to be overwritten on A and
2122: * no left singular vectors to be computed
2123: *
2124: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2125: *
2126: * Sufficient workspace for a fast algorithm
2127: *
2128: IR = 1
2129: IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2130: *
2131: * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2132: *
2133: LDWRKU = LDA
2134: CHUNK = N
2135: LDWRKR = LDA
2136: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2137: *
2138: * WORK(IU) is LDA by N and WORK(IR) is M by M
2139: *
2140: LDWRKU = LDA
2141: CHUNK = N
2142: LDWRKR = M
2143: ELSE
2144: *
2145: * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2146: *
2147: LDWRKU = M
2148: CHUNK = ( LWORK-M*M-M ) / M
2149: LDWRKR = M
2150: END IF
2151: ITAU = IR + LDWRKR*M
2152: IWORK = ITAU + M
2153: *
2154: * Compute A=L*Q
2155: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2156: *
2157: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2158: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2159: *
2160: * Copy L to WORK(IR) and zero out above it
2161: *
2162: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2163: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2164: $ WORK( IR+LDWRKR ), LDWRKR )
2165: *
2166: * Generate Q in A
2167: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2168: *
2169: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2170: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2171: IE = ITAU
2172: ITAUQ = IE + M
2173: ITAUP = ITAUQ + M
2174: IWORK = ITAUP + M
2175: *
2176: * Bidiagonalize L in WORK(IR)
2177: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2178: *
2179: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2180: $ WORK( ITAUQ ), WORK( ITAUP ),
2181: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2182: *
2183: * Generate right vectors bidiagonalizing L
2184: * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2185: *
2186: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2187: $ WORK( ITAUP ), WORK( IWORK ),
2188: $ LWORK-IWORK+1, IERR )
2189: IWORK = IE + M
2190: *
2191: * Perform bidiagonal QR iteration, computing right
2192: * singular vectors of L in WORK(IR)
2193: * (Workspace: need M*M+BDSPAC)
2194: *
2195: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2196: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2197: $ WORK( IWORK ), INFO )
2198: IU = IE + M
2199: *
2200: * Multiply right singular vectors of L in WORK(IR) by Q
2201: * in A, storing result in WORK(IU) and copying to A
2202: * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2203: *
2204: DO 30 I = 1, N, CHUNK
2205: BLK = MIN( N-I+1, CHUNK )
2206: CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2207: $ LDWRKR, A( 1, I ), LDA, ZERO,
2208: $ WORK( IU ), LDWRKU )
2209: CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2210: $ A( 1, I ), LDA )
2211: 30 CONTINUE
2212: *
2213: ELSE
2214: *
2215: * Insufficient workspace for a fast algorithm
2216: *
2217: IE = 1
2218: ITAUQ = IE + M
2219: ITAUP = ITAUQ + M
2220: IWORK = ITAUP + M
2221: *
2222: * Bidiagonalize A
2223: * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2224: *
2225: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2226: $ WORK( ITAUQ ), WORK( ITAUP ),
2227: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2228: *
2229: * Generate right vectors bidiagonalizing A
2230: * (Workspace: need 4*M, prefer 3*M+M*NB)
2231: *
2232: CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2233: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2234: IWORK = IE + M
2235: *
2236: * Perform bidiagonal QR iteration, computing right
2237: * singular vectors of A in A
2238: * (Workspace: need BDSPAC)
2239: *
2240: CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2241: $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2242: *
2243: END IF
2244: *
2245: ELSE IF( WNTVO .AND. WNTUAS ) THEN
2246: *
2247: * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2248: * M right singular vectors to be overwritten on A and
2249: * M left singular vectors to be computed in U
2250: *
2251: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2252: *
2253: * Sufficient workspace for a fast algorithm
2254: *
2255: IR = 1
2256: IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2257: *
2258: * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2259: *
2260: LDWRKU = LDA
2261: CHUNK = N
2262: LDWRKR = LDA
2263: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2264: *
2265: * WORK(IU) is LDA by N and WORK(IR) is M by M
2266: *
2267: LDWRKU = LDA
2268: CHUNK = N
2269: LDWRKR = M
2270: ELSE
2271: *
2272: * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2273: *
2274: LDWRKU = M
2275: CHUNK = ( LWORK-M*M-M ) / M
2276: LDWRKR = M
2277: END IF
2278: ITAU = IR + LDWRKR*M
2279: IWORK = ITAU + M
2280: *
2281: * Compute A=L*Q
2282: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2283: *
2284: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2285: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2286: *
2287: * Copy L to U, zeroing about above it
2288: *
2289: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2290: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2291: $ LDU )
2292: *
2293: * Generate Q in A
2294: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2295: *
2296: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2297: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2298: IE = ITAU
2299: ITAUQ = IE + M
2300: ITAUP = ITAUQ + M
2301: IWORK = ITAUP + M
2302: *
2303: * Bidiagonalize L in U, copying result to WORK(IR)
2304: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2305: *
2306: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2307: $ WORK( ITAUQ ), WORK( ITAUP ),
2308: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2309: CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2310: *
2311: * Generate right vectors bidiagonalizing L in WORK(IR)
2312: * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2313: *
2314: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2315: $ WORK( ITAUP ), WORK( IWORK ),
2316: $ LWORK-IWORK+1, IERR )
2317: *
2318: * Generate left vectors bidiagonalizing L in U
2319: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2320: *
2321: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2322: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2323: IWORK = IE + M
2324: *
2325: * Perform bidiagonal QR iteration, computing left
2326: * singular vectors of L in U, and computing right
2327: * singular vectors of L in WORK(IR)
2328: * (Workspace: need M*M+BDSPAC)
2329: *
2330: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2331: $ WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2332: $ WORK( IWORK ), INFO )
2333: IU = IE + M
2334: *
2335: * Multiply right singular vectors of L in WORK(IR) by Q
2336: * in A, storing result in WORK(IU) and copying to A
2337: * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2338: *
2339: DO 40 I = 1, N, CHUNK
2340: BLK = MIN( N-I+1, CHUNK )
2341: CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2342: $ LDWRKR, A( 1, I ), LDA, ZERO,
2343: $ WORK( IU ), LDWRKU )
2344: CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2345: $ A( 1, I ), LDA )
2346: 40 CONTINUE
2347: *
2348: ELSE
2349: *
2350: * Insufficient workspace for a fast algorithm
2351: *
2352: ITAU = 1
2353: IWORK = ITAU + M
2354: *
2355: * Compute A=L*Q
2356: * (Workspace: need 2*M, prefer M+M*NB)
2357: *
2358: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2359: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2360: *
2361: * Copy L to U, zeroing out above it
2362: *
2363: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2364: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2365: $ LDU )
2366: *
2367: * Generate Q in A
2368: * (Workspace: need 2*M, prefer M+M*NB)
2369: *
2370: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2371: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2372: IE = ITAU
2373: ITAUQ = IE + M
2374: ITAUP = ITAUQ + M
2375: IWORK = ITAUP + M
2376: *
2377: * Bidiagonalize L in U
2378: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2379: *
2380: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2381: $ WORK( ITAUQ ), WORK( ITAUP ),
2382: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2383: *
2384: * Multiply right vectors bidiagonalizing L by Q in A
2385: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2386: *
2387: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2388: $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2389: $ LWORK-IWORK+1, IERR )
2390: *
2391: * Generate left vectors bidiagonalizing L in U
2392: * (Workspace: need 4*M, prefer 3*M+M*NB)
2393: *
2394: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2395: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2396: IWORK = IE + M
2397: *
2398: * Perform bidiagonal QR iteration, computing left
2399: * singular vectors of A in U and computing right
2400: * singular vectors of A in A
2401: * (Workspace: need BDSPAC)
2402: *
2403: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2404: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
2405: *
2406: END IF
2407: *
2408: ELSE IF( WNTVS ) THEN
2409: *
2410: IF( WNTUN ) THEN
2411: *
2412: * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2413: * M right singular vectors to be computed in VT and
2414: * no left singular vectors to be computed
2415: *
2416: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2417: *
2418: * Sufficient workspace for a fast algorithm
2419: *
2420: IR = 1
2421: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2422: *
2423: * WORK(IR) is LDA by M
2424: *
2425: LDWRKR = LDA
2426: ELSE
2427: *
2428: * WORK(IR) is M by M
2429: *
2430: LDWRKR = M
2431: END IF
2432: ITAU = IR + LDWRKR*M
2433: IWORK = ITAU + M
2434: *
2435: * Compute A=L*Q
2436: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2437: *
2438: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2439: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2440: *
2441: * Copy L to WORK(IR), zeroing out above it
2442: *
2443: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2444: $ LDWRKR )
2445: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2446: $ WORK( IR+LDWRKR ), LDWRKR )
2447: *
2448: * Generate Q in A
2449: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2450: *
2451: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2452: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2453: IE = ITAU
2454: ITAUQ = IE + M
2455: ITAUP = ITAUQ + M
2456: IWORK = ITAUP + M
2457: *
2458: * Bidiagonalize L in WORK(IR)
2459: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2460: *
2461: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2462: $ WORK( IE ), WORK( ITAUQ ),
2463: $ WORK( ITAUP ), WORK( IWORK ),
2464: $ LWORK-IWORK+1, IERR )
2465: *
2466: * Generate right vectors bidiagonalizing L in
2467: * WORK(IR)
2468: * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2469: *
2470: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2471: $ WORK( ITAUP ), WORK( IWORK ),
2472: $ LWORK-IWORK+1, IERR )
2473: IWORK = IE + M
2474: *
2475: * Perform bidiagonal QR iteration, computing right
2476: * singular vectors of L in WORK(IR)
2477: * (Workspace: need M*M+BDSPAC)
2478: *
2479: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2480: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2481: $ WORK( IWORK ), INFO )
2482: *
2483: * Multiply right singular vectors of L in WORK(IR) by
2484: * Q in A, storing result in VT
2485: * (Workspace: need M*M)
2486: *
2487: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2488: $ LDWRKR, A, LDA, ZERO, VT, LDVT )
2489: *
2490: ELSE
2491: *
2492: * Insufficient workspace for a fast algorithm
2493: *
2494: ITAU = 1
2495: IWORK = ITAU + M
2496: *
2497: * Compute A=L*Q
2498: * (Workspace: need 2*M, prefer M+M*NB)
2499: *
2500: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2501: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2502: *
2503: * Copy result to VT
2504: *
2505: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2506: *
2507: * Generate Q in VT
2508: * (Workspace: need 2*M, prefer M+M*NB)
2509: *
2510: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2511: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2512: IE = ITAU
2513: ITAUQ = IE + M
2514: ITAUP = ITAUQ + M
2515: IWORK = ITAUP + M
2516: *
2517: * Zero out above L in A
2518: *
2519: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2520: $ LDA )
2521: *
2522: * Bidiagonalize L in A
2523: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2524: *
2525: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2526: $ WORK( ITAUQ ), WORK( ITAUP ),
2527: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2528: *
2529: * Multiply right vectors bidiagonalizing L by Q in VT
2530: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2531: *
2532: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2533: $ WORK( ITAUP ), VT, LDVT,
2534: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2535: IWORK = IE + M
2536: *
2537: * Perform bidiagonal QR iteration, computing right
2538: * singular vectors of A in VT
2539: * (Workspace: need BDSPAC)
2540: *
2541: CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2542: $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2543: $ INFO )
2544: *
2545: END IF
2546: *
2547: ELSE IF( WNTUO ) THEN
2548: *
2549: * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2550: * M right singular vectors to be computed in VT and
2551: * M left singular vectors to be overwritten on A
2552: *
2553: IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2554: *
2555: * Sufficient workspace for a fast algorithm
2556: *
2557: IU = 1
2558: IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2559: *
2560: * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2561: *
2562: LDWRKU = LDA
2563: IR = IU + LDWRKU*M
2564: LDWRKR = LDA
2565: ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2566: *
2567: * WORK(IU) is LDA by M and WORK(IR) is M by M
2568: *
2569: LDWRKU = LDA
2570: IR = IU + LDWRKU*M
2571: LDWRKR = M
2572: ELSE
2573: *
2574: * WORK(IU) is M by M and WORK(IR) is M by M
2575: *
2576: LDWRKU = M
2577: IR = IU + LDWRKU*M
2578: LDWRKR = M
2579: END IF
2580: ITAU = IR + LDWRKR*M
2581: IWORK = ITAU + M
2582: *
2583: * Compute A=L*Q
2584: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2585: *
2586: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2587: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2588: *
2589: * Copy L to WORK(IU), zeroing out below it
2590: *
2591: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2592: $ LDWRKU )
2593: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2594: $ WORK( IU+LDWRKU ), LDWRKU )
2595: *
2596: * Generate Q in A
2597: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2598: *
2599: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2600: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2601: IE = ITAU
2602: ITAUQ = IE + M
2603: ITAUP = ITAUQ + M
2604: IWORK = ITAUP + M
2605: *
2606: * Bidiagonalize L in WORK(IU), copying result to
2607: * WORK(IR)
2608: * (Workspace: need 2*M*M+4*M,
2609: * prefer 2*M*M+3*M+2*M*NB)
2610: *
2611: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2612: $ WORK( IE ), WORK( ITAUQ ),
2613: $ WORK( ITAUP ), WORK( IWORK ),
2614: $ LWORK-IWORK+1, IERR )
2615: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2616: $ WORK( IR ), LDWRKR )
2617: *
2618: * Generate right bidiagonalizing vectors in WORK(IU)
2619: * (Workspace: need 2*M*M+4*M-1,
2620: * prefer 2*M*M+3*M+(M-1)*NB)
2621: *
2622: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2623: $ WORK( ITAUP ), WORK( IWORK ),
2624: $ LWORK-IWORK+1, IERR )
2625: *
2626: * Generate left bidiagonalizing vectors in WORK(IR)
2627: * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2628: *
2629: CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2630: $ WORK( ITAUQ ), WORK( IWORK ),
2631: $ LWORK-IWORK+1, IERR )
2632: IWORK = IE + M
2633: *
2634: * Perform bidiagonal QR iteration, computing left
2635: * singular vectors of L in WORK(IR) and computing
2636: * right singular vectors of L in WORK(IU)
2637: * (Workspace: need 2*M*M+BDSPAC)
2638: *
2639: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2640: $ WORK( IU ), LDWRKU, WORK( IR ),
2641: $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2642: *
2643: * Multiply right singular vectors of L in WORK(IU) by
2644: * Q in A, storing result in VT
2645: * (Workspace: need M*M)
2646: *
2647: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2648: $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2649: *
2650: * Copy left singular vectors of L to A
2651: * (Workspace: need M*M)
2652: *
2653: CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2654: $ LDA )
2655: *
2656: ELSE
2657: *
2658: * Insufficient workspace for a fast algorithm
2659: *
2660: ITAU = 1
2661: IWORK = ITAU + M
2662: *
2663: * Compute A=L*Q, copying result to VT
2664: * (Workspace: need 2*M, prefer M+M*NB)
2665: *
2666: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2667: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2668: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2669: *
2670: * Generate Q in VT
2671: * (Workspace: need 2*M, prefer M+M*NB)
2672: *
2673: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2674: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2675: IE = ITAU
2676: ITAUQ = IE + M
2677: ITAUP = ITAUQ + M
2678: IWORK = ITAUP + M
2679: *
2680: * Zero out above L in A
2681: *
2682: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2683: $ LDA )
2684: *
2685: * Bidiagonalize L in A
2686: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2687: *
2688: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2689: $ WORK( ITAUQ ), WORK( ITAUP ),
2690: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2691: *
2692: * Multiply right vectors bidiagonalizing L by Q in VT
2693: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2694: *
2695: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2696: $ WORK( ITAUP ), VT, LDVT,
2697: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2698: *
2699: * Generate left bidiagonalizing vectors of L in A
2700: * (Workspace: need 4*M, prefer 3*M+M*NB)
2701: *
2702: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2703: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2704: IWORK = IE + M
2705: *
2706: * Perform bidiagonal QR iteration, compute left
2707: * singular vectors of A in A and compute right
2708: * singular vectors of A in VT
2709: * (Workspace: need BDSPAC)
2710: *
2711: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2712: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2713: $ INFO )
2714: *
2715: END IF
2716: *
2717: ELSE IF( WNTUAS ) THEN
2718: *
2719: * Path 6t(N much larger than M, JOBU='S' or 'A',
2720: * JOBVT='S')
2721: * M right singular vectors to be computed in VT and
2722: * M left singular vectors to be computed in U
2723: *
2724: IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2725: *
2726: * Sufficient workspace for a fast algorithm
2727: *
2728: IU = 1
2729: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2730: *
2731: * WORK(IU) is LDA by N
2732: *
2733: LDWRKU = LDA
2734: ELSE
2735: *
2736: * WORK(IU) is LDA by M
2737: *
2738: LDWRKU = M
2739: END IF
2740: ITAU = IU + LDWRKU*M
2741: IWORK = ITAU + M
2742: *
2743: * Compute A=L*Q
2744: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2745: *
2746: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2747: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2748: *
2749: * Copy L to WORK(IU), zeroing out above it
2750: *
2751: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2752: $ LDWRKU )
2753: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2754: $ WORK( IU+LDWRKU ), LDWRKU )
2755: *
2756: * Generate Q in A
2757: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2758: *
2759: CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2760: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2761: IE = ITAU
2762: ITAUQ = IE + M
2763: ITAUP = ITAUQ + M
2764: IWORK = ITAUP + M
2765: *
2766: * Bidiagonalize L in WORK(IU), copying result to U
2767: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2768: *
2769: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2770: $ WORK( IE ), WORK( ITAUQ ),
2771: $ WORK( ITAUP ), WORK( IWORK ),
2772: $ LWORK-IWORK+1, IERR )
2773: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2774: $ LDU )
2775: *
2776: * Generate right bidiagonalizing vectors in WORK(IU)
2777: * (Workspace: need M*M+4*M-1,
2778: * prefer M*M+3*M+(M-1)*NB)
2779: *
2780: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2781: $ WORK( ITAUP ), WORK( IWORK ),
2782: $ LWORK-IWORK+1, IERR )
2783: *
2784: * Generate left bidiagonalizing vectors in U
2785: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2786: *
2787: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2788: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2789: IWORK = IE + M
2790: *
2791: * Perform bidiagonal QR iteration, computing left
2792: * singular vectors of L in U and computing right
2793: * singular vectors of L in WORK(IU)
2794: * (Workspace: need M*M+BDSPAC)
2795: *
2796: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2797: $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2798: $ WORK( IWORK ), INFO )
2799: *
2800: * Multiply right singular vectors of L in WORK(IU) by
2801: * Q in A, storing result in VT
2802: * (Workspace: need M*M)
2803: *
2804: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2805: $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2806: *
2807: ELSE
2808: *
2809: * Insufficient workspace for a fast algorithm
2810: *
2811: ITAU = 1
2812: IWORK = ITAU + M
2813: *
2814: * Compute A=L*Q, copying result to VT
2815: * (Workspace: need 2*M, prefer M+M*NB)
2816: *
2817: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2818: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2819: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2820: *
2821: * Generate Q in VT
2822: * (Workspace: need 2*M, prefer M+M*NB)
2823: *
2824: CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2825: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2826: *
2827: * Copy L to U, zeroing out above it
2828: *
2829: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2830: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2831: $ LDU )
2832: IE = ITAU
2833: ITAUQ = IE + M
2834: ITAUP = ITAUQ + M
2835: IWORK = ITAUP + M
2836: *
2837: * Bidiagonalize L in U
2838: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2839: *
2840: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2841: $ WORK( ITAUQ ), WORK( ITAUP ),
2842: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2843: *
2844: * Multiply right bidiagonalizing vectors in U by Q
2845: * in VT
2846: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2847: *
2848: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2849: $ WORK( ITAUP ), VT, LDVT,
2850: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2851: *
2852: * Generate left bidiagonalizing vectors in U
2853: * (Workspace: need 4*M, prefer 3*M+M*NB)
2854: *
2855: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2856: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2857: IWORK = IE + M
2858: *
2859: * Perform bidiagonal QR iteration, computing left
2860: * singular vectors of A in U and computing right
2861: * singular vectors of A in VT
2862: * (Workspace: need BDSPAC)
2863: *
2864: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2865: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2866: $ INFO )
2867: *
2868: END IF
2869: *
2870: END IF
2871: *
2872: ELSE IF( WNTVA ) THEN
2873: *
2874: IF( WNTUN ) THEN
2875: *
2876: * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2877: * N right singular vectors to be computed in VT and
2878: * no left singular vectors to be computed
2879: *
2880: IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2881: *
2882: * Sufficient workspace for a fast algorithm
2883: *
2884: IR = 1
2885: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2886: *
2887: * WORK(IR) is LDA by M
2888: *
2889: LDWRKR = LDA
2890: ELSE
2891: *
2892: * WORK(IR) is M by M
2893: *
2894: LDWRKR = M
2895: END IF
2896: ITAU = IR + LDWRKR*M
2897: IWORK = ITAU + M
2898: *
2899: * Compute A=L*Q, copying result to VT
2900: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2901: *
2902: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2903: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2904: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2905: *
2906: * Copy L to WORK(IR), zeroing out above it
2907: *
2908: CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2909: $ LDWRKR )
2910: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2911: $ WORK( IR+LDWRKR ), LDWRKR )
2912: *
2913: * Generate Q in VT
2914: * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2915: *
2916: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2917: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2918: IE = ITAU
2919: ITAUQ = IE + M
2920: ITAUP = ITAUQ + M
2921: IWORK = ITAUP + M
2922: *
2923: * Bidiagonalize L in WORK(IR)
2924: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2925: *
2926: CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2927: $ WORK( IE ), WORK( ITAUQ ),
2928: $ WORK( ITAUP ), WORK( IWORK ),
2929: $ LWORK-IWORK+1, IERR )
2930: *
2931: * Generate right bidiagonalizing vectors in WORK(IR)
2932: * (Workspace: need M*M+4*M-1,
2933: * prefer M*M+3*M+(M-1)*NB)
2934: *
2935: CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2936: $ WORK( ITAUP ), WORK( IWORK ),
2937: $ LWORK-IWORK+1, IERR )
2938: IWORK = IE + M
2939: *
2940: * Perform bidiagonal QR iteration, computing right
2941: * singular vectors of L in WORK(IR)
2942: * (Workspace: need M*M+BDSPAC)
2943: *
2944: CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2945: $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2946: $ WORK( IWORK ), INFO )
2947: *
2948: * Multiply right singular vectors of L in WORK(IR) by
2949: * Q in VT, storing result in A
2950: * (Workspace: need M*M)
2951: *
2952: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2953: $ LDWRKR, VT, LDVT, ZERO, A, LDA )
2954: *
2955: * Copy right singular vectors of A from A to VT
2956: *
2957: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2958: *
2959: ELSE
2960: *
2961: * Insufficient workspace for a fast algorithm
2962: *
2963: ITAU = 1
2964: IWORK = ITAU + M
2965: *
2966: * Compute A=L*Q, copying result to VT
2967: * (Workspace: need 2*M, prefer M+M*NB)
2968: *
2969: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2970: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2971: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2972: *
2973: * Generate Q in VT
2974: * (Workspace: need M+N, prefer M+N*NB)
2975: *
2976: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2977: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2978: IE = ITAU
2979: ITAUQ = IE + M
2980: ITAUP = ITAUQ + M
2981: IWORK = ITAUP + M
2982: *
2983: * Zero out above L in A
2984: *
2985: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2986: $ LDA )
2987: *
2988: * Bidiagonalize L in A
2989: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2990: *
2991: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2992: $ WORK( ITAUQ ), WORK( ITAUP ),
2993: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2994: *
2995: * Multiply right bidiagonalizing vectors in A by Q
2996: * in VT
2997: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2998: *
2999: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3000: $ WORK( ITAUP ), VT, LDVT,
3001: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3002: IWORK = IE + M
3003: *
3004: * Perform bidiagonal QR iteration, computing right
3005: * singular vectors of A in VT
3006: * (Workspace: need BDSPAC)
3007: *
3008: CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
3009: $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
3010: $ INFO )
3011: *
3012: END IF
3013: *
3014: ELSE IF( WNTUO ) THEN
3015: *
3016: * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3017: * N right singular vectors to be computed in VT and
3018: * M left singular vectors to be overwritten on A
3019: *
3020: IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3021: *
3022: * Sufficient workspace for a fast algorithm
3023: *
3024: IU = 1
3025: IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3026: *
3027: * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3028: *
3029: LDWRKU = LDA
3030: IR = IU + LDWRKU*M
3031: LDWRKR = LDA
3032: ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
3033: *
3034: * WORK(IU) is LDA by M and WORK(IR) is M by M
3035: *
3036: LDWRKU = LDA
3037: IR = IU + LDWRKU*M
3038: LDWRKR = M
3039: ELSE
3040: *
3041: * WORK(IU) is M by M and WORK(IR) is M by M
3042: *
3043: LDWRKU = M
3044: IR = IU + LDWRKU*M
3045: LDWRKR = M
3046: END IF
3047: ITAU = IR + LDWRKR*M
3048: IWORK = ITAU + M
3049: *
3050: * Compute A=L*Q, copying result to VT
3051: * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3052: *
3053: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3054: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3055: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3056: *
3057: * Generate Q in VT
3058: * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3059: *
3060: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3061: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3062: *
3063: * Copy L to WORK(IU), zeroing out above it
3064: *
3065: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3066: $ LDWRKU )
3067: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3068: $ WORK( IU+LDWRKU ), LDWRKU )
3069: IE = ITAU
3070: ITAUQ = IE + M
3071: ITAUP = ITAUQ + M
3072: IWORK = ITAUP + M
3073: *
3074: * Bidiagonalize L in WORK(IU), copying result to
3075: * WORK(IR)
3076: * (Workspace: need 2*M*M+4*M,
3077: * prefer 2*M*M+3*M+2*M*NB)
3078: *
3079: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3080: $ WORK( IE ), WORK( ITAUQ ),
3081: $ WORK( ITAUP ), WORK( IWORK ),
3082: $ LWORK-IWORK+1, IERR )
3083: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3084: $ WORK( IR ), LDWRKR )
3085: *
3086: * Generate right bidiagonalizing vectors in WORK(IU)
3087: * (Workspace: need 2*M*M+4*M-1,
3088: * prefer 2*M*M+3*M+(M-1)*NB)
3089: *
3090: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3091: $ WORK( ITAUP ), WORK( IWORK ),
3092: $ LWORK-IWORK+1, IERR )
3093: *
3094: * Generate left bidiagonalizing vectors in WORK(IR)
3095: * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3096: *
3097: CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3098: $ WORK( ITAUQ ), WORK( IWORK ),
3099: $ LWORK-IWORK+1, IERR )
3100: IWORK = IE + M
3101: *
3102: * Perform bidiagonal QR iteration, computing left
3103: * singular vectors of L in WORK(IR) and computing
3104: * right singular vectors of L in WORK(IU)
3105: * (Workspace: need 2*M*M+BDSPAC)
3106: *
3107: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3108: $ WORK( IU ), LDWRKU, WORK( IR ),
3109: $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3110: *
3111: * Multiply right singular vectors of L in WORK(IU) by
3112: * Q in VT, storing result in A
3113: * (Workspace: need M*M)
3114: *
3115: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3116: $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3117: *
3118: * Copy right singular vectors of A from A to VT
3119: *
3120: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3121: *
3122: * Copy left singular vectors of A from WORK(IR) to A
3123: *
3124: CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3125: $ LDA )
3126: *
3127: ELSE
3128: *
3129: * Insufficient workspace for a fast algorithm
3130: *
3131: ITAU = 1
3132: IWORK = ITAU + M
3133: *
3134: * Compute A=L*Q, copying result to VT
3135: * (Workspace: need 2*M, prefer M+M*NB)
3136: *
3137: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3138: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3139: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3140: *
3141: * Generate Q in VT
3142: * (Workspace: need M+N, prefer M+N*NB)
3143: *
3144: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3145: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3146: IE = ITAU
3147: ITAUQ = IE + M
3148: ITAUP = ITAUQ + M
3149: IWORK = ITAUP + M
3150: *
3151: * Zero out above L in A
3152: *
3153: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3154: $ LDA )
3155: *
3156: * Bidiagonalize L in A
3157: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3158: *
3159: CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3160: $ WORK( ITAUQ ), WORK( ITAUP ),
3161: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3162: *
3163: * Multiply right bidiagonalizing vectors in A by Q
3164: * in VT
3165: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3166: *
3167: CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3168: $ WORK( ITAUP ), VT, LDVT,
3169: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3170: *
3171: * Generate left bidiagonalizing vectors in A
3172: * (Workspace: need 4*M, prefer 3*M+M*NB)
3173: *
3174: CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3175: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3176: IWORK = IE + M
3177: *
3178: * Perform bidiagonal QR iteration, computing left
3179: * singular vectors of A in A and computing right
3180: * singular vectors of A in VT
3181: * (Workspace: need BDSPAC)
3182: *
3183: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3184: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3185: $ INFO )
3186: *
3187: END IF
3188: *
3189: ELSE IF( WNTUAS ) THEN
3190: *
3191: * Path 9t(N much larger than M, JOBU='S' or 'A',
3192: * JOBVT='A')
3193: * N right singular vectors to be computed in VT and
3194: * M left singular vectors to be computed in U
3195: *
3196: IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3197: *
3198: * Sufficient workspace for a fast algorithm
3199: *
3200: IU = 1
3201: IF( LWORK.GE.WRKBL+LDA*M ) THEN
3202: *
3203: * WORK(IU) is LDA by M
3204: *
3205: LDWRKU = LDA
3206: ELSE
3207: *
3208: * WORK(IU) is M by M
3209: *
3210: LDWRKU = M
3211: END IF
3212: ITAU = IU + LDWRKU*M
3213: IWORK = ITAU + M
3214: *
3215: * Compute A=L*Q, copying result to VT
3216: * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3217: *
3218: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3219: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3220: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3221: *
3222: * Generate Q in VT
3223: * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3224: *
3225: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3226: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3227: *
3228: * Copy L to WORK(IU), zeroing out above it
3229: *
3230: CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3231: $ LDWRKU )
3232: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3233: $ WORK( IU+LDWRKU ), LDWRKU )
3234: IE = ITAU
3235: ITAUQ = IE + M
3236: ITAUP = ITAUQ + M
3237: IWORK = ITAUP + M
3238: *
3239: * Bidiagonalize L in WORK(IU), copying result to U
3240: * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3241: *
3242: CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3243: $ WORK( IE ), WORK( ITAUQ ),
3244: $ WORK( ITAUP ), WORK( IWORK ),
3245: $ LWORK-IWORK+1, IERR )
3246: CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3247: $ LDU )
3248: *
3249: * Generate right bidiagonalizing vectors in WORK(IU)
3250: * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3251: *
3252: CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3253: $ WORK( ITAUP ), WORK( IWORK ),
3254: $ LWORK-IWORK+1, IERR )
3255: *
3256: * Generate left bidiagonalizing vectors in U
3257: * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3258: *
3259: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3260: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3261: IWORK = IE + M
3262: *
3263: * Perform bidiagonal QR iteration, computing left
3264: * singular vectors of L in U and computing right
3265: * singular vectors of L in WORK(IU)
3266: * (Workspace: need M*M+BDSPAC)
3267: *
3268: CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3269: $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3270: $ WORK( IWORK ), INFO )
3271: *
3272: * Multiply right singular vectors of L in WORK(IU) by
3273: * Q in VT, storing result in A
3274: * (Workspace: need M*M)
3275: *
3276: CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3277: $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3278: *
3279: * Copy right singular vectors of A from A to VT
3280: *
3281: CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3282: *
3283: ELSE
3284: *
3285: * Insufficient workspace for a fast algorithm
3286: *
3287: ITAU = 1
3288: IWORK = ITAU + M
3289: *
3290: * Compute A=L*Q, copying result to VT
3291: * (Workspace: need 2*M, prefer M+M*NB)
3292: *
3293: CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3294: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3295: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3296: *
3297: * Generate Q in VT
3298: * (Workspace: need M+N, prefer M+N*NB)
3299: *
3300: CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3301: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3302: *
3303: * Copy L to U, zeroing out above it
3304: *
3305: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3306: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3307: $ LDU )
3308: IE = ITAU
3309: ITAUQ = IE + M
3310: ITAUP = ITAUQ + M
3311: IWORK = ITAUP + M
3312: *
3313: * Bidiagonalize L in U
3314: * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3315: *
3316: CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3317: $ WORK( ITAUQ ), WORK( ITAUP ),
3318: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3319: *
3320: * Multiply right bidiagonalizing vectors in U by Q
3321: * in VT
3322: * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3323: *
3324: CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3325: $ WORK( ITAUP ), VT, LDVT,
3326: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3327: *
3328: * Generate left bidiagonalizing vectors in U
3329: * (Workspace: need 4*M, prefer 3*M+M*NB)
3330: *
3331: CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3332: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3333: IWORK = IE + M
3334: *
3335: * Perform bidiagonal QR iteration, computing left
3336: * singular vectors of A in U and computing right
3337: * singular vectors of A in VT
3338: * (Workspace: need BDSPAC)
3339: *
3340: CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3341: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3342: $ INFO )
3343: *
3344: END IF
3345: *
3346: END IF
3347: *
3348: END IF
3349: *
3350: ELSE
3351: *
3352: * N .LT. MNTHR
3353: *
3354: * Path 10t(N greater than M, but not much larger)
3355: * Reduce to bidiagonal form without LQ decomposition
3356: *
3357: IE = 1
3358: ITAUQ = IE + M
3359: ITAUP = ITAUQ + M
3360: IWORK = ITAUP + M
3361: *
3362: * Bidiagonalize A
3363: * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3364: *
3365: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3366: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3367: $ IERR )
3368: IF( WNTUAS ) THEN
3369: *
3370: * If left singular vectors desired in U, copy result to U
3371: * and generate left bidiagonalizing vectors in U
3372: * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3373: *
3374: CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3375: CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3376: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3377: END IF
3378: IF( WNTVAS ) THEN
3379: *
3380: * If right singular vectors desired in VT, copy result to
3381: * VT and generate right bidiagonalizing vectors in VT
3382: * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3383: *
3384: CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3385: IF( WNTVA )
3386: $ NRVT = N
3387: IF( WNTVS )
3388: $ NRVT = M
3389: CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3390: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3391: END IF
3392: IF( WNTUO ) THEN
3393: *
3394: * If left singular vectors desired in A, generate left
3395: * bidiagonalizing vectors in A
3396: * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3397: *
3398: CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3399: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3400: END IF
3401: IF( WNTVO ) THEN
3402: *
3403: * If right singular vectors desired in A, generate right
3404: * bidiagonalizing vectors in A
3405: * (Workspace: need 4*M, prefer 3*M+M*NB)
3406: *
3407: CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3408: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3409: END IF
3410: IWORK = IE + M
3411: IF( WNTUAS .OR. WNTUO )
3412: $ NRU = M
3413: IF( WNTUN )
3414: $ NRU = 0
3415: IF( WNTVAS .OR. WNTVO )
3416: $ NCVT = N
3417: IF( WNTVN )
3418: $ NCVT = 0
3419: IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3420: *
3421: * Perform bidiagonal QR iteration, if desired, computing
3422: * left singular vectors in U and computing right singular
3423: * vectors in VT
3424: * (Workspace: need BDSPAC)
3425: *
3426: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3427: $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3428: ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3429: *
3430: * Perform bidiagonal QR iteration, if desired, computing
3431: * left singular vectors in U and computing right singular
3432: * vectors in A
3433: * (Workspace: need BDSPAC)
3434: *
3435: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3436: $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
3437: ELSE
3438: *
3439: * Perform bidiagonal QR iteration, if desired, computing
3440: * left singular vectors in A and computing right singular
3441: * vectors in VT
3442: * (Workspace: need BDSPAC)
3443: *
3444: CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3445: $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3446: END IF
3447: *
3448: END IF
3449: *
3450: END IF
3451: *
3452: * If DBDSQR failed to converge, copy unconverged superdiagonals
3453: * to WORK( 2:MINMN )
3454: *
3455: IF( INFO.NE.0 ) THEN
3456: IF( IE.GT.2 ) THEN
3457: DO 50 I = 1, MINMN - 1
3458: WORK( I+1 ) = WORK( I+IE-1 )
3459: 50 CONTINUE
3460: END IF
3461: IF( IE.LT.2 ) THEN
3462: DO 60 I = MINMN - 1, 1, -1
3463: WORK( I+1 ) = WORK( I+IE-1 )
3464: 60 CONTINUE
3465: END IF
3466: END IF
3467: *
3468: * Undo scaling if necessary
3469: *
3470: IF( ISCL.EQ.1 ) THEN
3471: IF( ANRM.GT.BIGNUM )
3472: $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3473: $ IERR )
3474: IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3475: $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3476: $ MINMN, IERR )
3477: IF( ANRM.LT.SMLNUM )
3478: $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3479: $ IERR )
3480: IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3481: $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3482: $ MINMN, IERR )
3483: END IF
3484: *
3485: * Return optimal workspace in WORK(1)
3486: *
3487: WORK( 1 ) = MAXWRK
3488: *
3489: RETURN
3490: *
3491: * End of DGESVD
3492: *
3493: END
CVSweb interface <joel.bertrand@systella.fr>