1: *> \brief <b> ZGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZGESVD + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesvd.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesvd.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesvd.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22: * WORK, LWORK, RWORK, INFO )
23: *
24: * .. Scalar Arguments ..
25: * CHARACTER JOBU, JOBVT
26: * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27: * ..
28: * .. Array Arguments ..
29: * DOUBLE PRECISION RWORK( * ), S( * )
30: * COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
31: * $ WORK( * )
32: * ..
33: *
34: *
35: *> \par Purpose:
36: * =============
37: *>
38: *> \verbatim
39: *>
40: *> ZGESVD computes the singular value decomposition (SVD) of a complex
41: *> M-by-N matrix A, optionally computing the left and/or right singular
42: *> vectors. The SVD is written
43: *>
44: *> A = U * SIGMA * conjugate-transpose(V)
45: *>
46: *> where SIGMA is an M-by-N matrix which is zero except for its
47: *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48: *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49: *> are the singular values of A; they are real and non-negative, and
50: *> are returned in descending order. The first min(m,n) columns of
51: *> U and V are the left and right singular vectors of A.
52: *>
53: *> Note that the routine returns V**H, not V.
54: *> \endverbatim
55: *
56: * Arguments:
57: * ==========
58: *
59: *> \param[in] JOBU
60: *> \verbatim
61: *> JOBU is CHARACTER*1
62: *> Specifies options for computing all or part of the matrix U:
63: *> = 'A': all M columns of U are returned in array U:
64: *> = 'S': the first min(m,n) columns of U (the left singular
65: *> vectors) are returned in the array U;
66: *> = 'O': the first min(m,n) columns of U (the left singular
67: *> vectors) are overwritten on the array A;
68: *> = 'N': no columns of U (no left singular vectors) are
69: *> computed.
70: *> \endverbatim
71: *>
72: *> \param[in] JOBVT
73: *> \verbatim
74: *> JOBVT is CHARACTER*1
75: *> Specifies options for computing all or part of the matrix
76: *> V**H:
77: *> = 'A': all N rows of V**H are returned in the array VT;
78: *> = 'S': the first min(m,n) rows of V**H (the right singular
79: *> vectors) are returned in the array VT;
80: *> = 'O': the first min(m,n) rows of V**H (the right singular
81: *> vectors) are overwritten on the array A;
82: *> = 'N': no rows of V**H (no right singular vectors) are
83: *> computed.
84: *>
85: *> JOBVT and JOBU cannot both be 'O'.
86: *> \endverbatim
87: *>
88: *> \param[in] M
89: *> \verbatim
90: *> M is INTEGER
91: *> The number of rows of the input matrix A. M >= 0.
92: *> \endverbatim
93: *>
94: *> \param[in] N
95: *> \verbatim
96: *> N is INTEGER
97: *> The number of columns of the input matrix A. N >= 0.
98: *> \endverbatim
99: *>
100: *> \param[in,out] A
101: *> \verbatim
102: *> A is COMPLEX*16 array, dimension (LDA,N)
103: *> On entry, the M-by-N matrix A.
104: *> On exit,
105: *> if JOBU = 'O', A is overwritten with the first min(m,n)
106: *> columns of U (the left singular vectors,
107: *> stored columnwise);
108: *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109: *> rows of V**H (the right singular vectors,
110: *> stored rowwise);
111: *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
112: *> are destroyed.
113: *> \endverbatim
114: *>
115: *> \param[in] LDA
116: *> \verbatim
117: *> LDA is INTEGER
118: *> The leading dimension of the array A. LDA >= max(1,M).
119: *> \endverbatim
120: *>
121: *> \param[out] S
122: *> \verbatim
123: *> S is DOUBLE PRECISION array, dimension (min(M,N))
124: *> The singular values of A, sorted so that S(i) >= S(i+1).
125: *> \endverbatim
126: *>
127: *> \param[out] U
128: *> \verbatim
129: *> U is COMPLEX*16 array, dimension (LDU,UCOL)
130: *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131: *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132: *> if JOBU = 'S', U contains the first min(m,n) columns of U
133: *> (the left singular vectors, stored columnwise);
134: *> if JOBU = 'N' or 'O', U is not referenced.
135: *> \endverbatim
136: *>
137: *> \param[in] LDU
138: *> \verbatim
139: *> LDU is INTEGER
140: *> The leading dimension of the array U. LDU >= 1; if
141: *> JOBU = 'S' or 'A', LDU >= M.
142: *> \endverbatim
143: *>
144: *> \param[out] VT
145: *> \verbatim
146: *> VT is COMPLEX*16 array, dimension (LDVT,N)
147: *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
148: *> V**H;
149: *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150: *> V**H (the right singular vectors, stored rowwise);
151: *> if JOBVT = 'N' or 'O', VT is not referenced.
152: *> \endverbatim
153: *>
154: *> \param[in] LDVT
155: *> \verbatim
156: *> LDVT is INTEGER
157: *> The leading dimension of the array VT. LDVT >= 1; if
158: *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
159: *> \endverbatim
160: *>
161: *> \param[out] WORK
162: *> \verbatim
163: *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
164: *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
165: *> \endverbatim
166: *>
167: *> \param[in] LWORK
168: *> \verbatim
169: *> LWORK is INTEGER
170: *> The dimension of the array WORK.
171: *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172: *> For good performance, LWORK should generally be larger.
173: *>
174: *> If LWORK = -1, then a workspace query is assumed; the routine
175: *> only calculates the optimal size of the WORK array, returns
176: *> this value as the first entry of the WORK array, and no error
177: *> message related to LWORK is issued by XERBLA.
178: *> \endverbatim
179: *>
180: *> \param[out] RWORK
181: *> \verbatim
182: *> RWORK is DOUBLE PRECISION array, dimension (5*min(M,N))
183: *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184: *> unconverged superdiagonal elements of an upper bidiagonal
185: *> matrix B whose diagonal is in S (not necessarily sorted).
186: *> B satisfies A = U * B * VT, so it has the same singular
187: *> values as A, and singular vectors related by U and VT.
188: *> \endverbatim
189: *>
190: *> \param[out] INFO
191: *> \verbatim
192: *> INFO is INTEGER
193: *> = 0: successful exit.
194: *> < 0: if INFO = -i, the i-th argument had an illegal value.
195: *> > 0: if ZBDSQR did not converge, INFO specifies how many
196: *> superdiagonals of an intermediate bidiagonal form B
197: *> did not converge to zero. See the description of RWORK
198: *> above for details.
199: *> \endverbatim
200: *
201: * Authors:
202: * ========
203: *
204: *> \author Univ. of Tennessee
205: *> \author Univ. of California Berkeley
206: *> \author Univ. of Colorado Denver
207: *> \author NAG Ltd.
208: *
209: *> \date April 2012
210: *
211: *> \ingroup complex16GEsing
212: *
213: * =====================================================================
214: SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
215: $ VT, LDVT, WORK, LWORK, RWORK, INFO )
216: *
217: * -- LAPACK driver routine (version 3.6.0) --
218: * -- LAPACK is a software package provided by Univ. of Tennessee, --
219: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
220: * April 2012
221: *
222: * .. Scalar Arguments ..
223: CHARACTER JOBU, JOBVT
224: INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
225: * ..
226: * .. Array Arguments ..
227: DOUBLE PRECISION RWORK( * ), S( * )
228: COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
229: $ WORK( * )
230: * ..
231: *
232: * =====================================================================
233: *
234: * .. Parameters ..
235: COMPLEX*16 CZERO, CONE
236: PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ),
237: $ CONE = ( 1.0D0, 0.0D0 ) )
238: DOUBLE PRECISION ZERO, ONE
239: PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
240: * ..
241: * .. Local Scalars ..
242: LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
243: $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
244: INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
245: $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
246: $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
247: $ NRVT, WRKBL
248: INTEGER LWORK_ZGEQRF, LWORK_ZUNGQR_N, LWORK_ZUNGQR_M,
249: $ LWORK_ZGEBRD, LWORK_ZUNGBR_P, LWORK_ZUNGBR_Q,
250: $ LWORK_ZGELQF, LWORK_ZUNGLQ_N, LWORK_ZUNGLQ_M
251: DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
252: * ..
253: * .. Local Arrays ..
254: DOUBLE PRECISION DUM( 1 )
255: COMPLEX*16 CDUM( 1 )
256: * ..
257: * .. External Subroutines ..
258: EXTERNAL DLASCL, XERBLA, ZBDSQR, ZGEBRD, ZGELQF, ZGEMM,
259: $ ZGEQRF, ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNGLQ,
260: $ ZUNGQR, ZUNMBR
261: * ..
262: * .. External Functions ..
263: LOGICAL LSAME
264: INTEGER ILAENV
265: DOUBLE PRECISION DLAMCH, ZLANGE
266: EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE
267: * ..
268: * .. Intrinsic Functions ..
269: INTRINSIC MAX, MIN, SQRT
270: * ..
271: * .. Executable Statements ..
272: *
273: * Test the input arguments
274: *
275: INFO = 0
276: MINMN = MIN( M, N )
277: WNTUA = LSAME( JOBU, 'A' )
278: WNTUS = LSAME( JOBU, 'S' )
279: WNTUAS = WNTUA .OR. WNTUS
280: WNTUO = LSAME( JOBU, 'O' )
281: WNTUN = LSAME( JOBU, 'N' )
282: WNTVA = LSAME( JOBVT, 'A' )
283: WNTVS = LSAME( JOBVT, 'S' )
284: WNTVAS = WNTVA .OR. WNTVS
285: WNTVO = LSAME( JOBVT, 'O' )
286: WNTVN = LSAME( JOBVT, 'N' )
287: LQUERY = ( LWORK.EQ.-1 )
288: *
289: IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
290: INFO = -1
291: ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
292: $ ( WNTVO .AND. WNTUO ) ) THEN
293: INFO = -2
294: ELSE IF( M.LT.0 ) THEN
295: INFO = -3
296: ELSE IF( N.LT.0 ) THEN
297: INFO = -4
298: ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
299: INFO = -6
300: ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
301: INFO = -9
302: ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
303: $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
304: INFO = -11
305: END IF
306: *
307: * Compute workspace
308: * (Note: Comments in the code beginning "Workspace:" describe the
309: * minimal amount of workspace needed at that point in the code,
310: * as well as the preferred amount for good performance.
311: * CWorkspace refers to complex workspace, and RWorkspace to
312: * real workspace. NB refers to the optimal block size for the
313: * immediately following subroutine, as returned by ILAENV.)
314: *
315: IF( INFO.EQ.0 ) THEN
316: MINWRK = 1
317: MAXWRK = 1
318: IF( M.GE.N .AND. MINMN.GT.0 ) THEN
319: *
320: * Space needed for ZBDSQR is BDSPAC = 5*N
321: *
322: MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
323: * Compute space needed for ZGEQRF
324: CALL ZGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
325: LWORK_ZGEQRF=CDUM(1)
326: * Compute space needed for ZUNGQR
327: CALL ZUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
328: LWORK_ZUNGQR_N=CDUM(1)
329: CALL ZUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
330: LWORK_ZUNGQR_M=CDUM(1)
331: * Compute space needed for ZGEBRD
332: CALL ZGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1),
333: $ CDUM(1), CDUM(1), -1, IERR )
334: LWORK_ZGEBRD=CDUM(1)
335: * Compute space needed for ZUNGBR
336: CALL ZUNGBR( 'P', N, N, N, A, LDA, CDUM(1),
337: $ CDUM(1), -1, IERR )
338: LWORK_ZUNGBR_P=CDUM(1)
339: CALL ZUNGBR( 'Q', N, N, N, A, LDA, CDUM(1),
340: $ CDUM(1), -1, IERR )
341: LWORK_ZUNGBR_Q=CDUM(1)
342: *
343: IF( M.GE.MNTHR ) THEN
344: IF( WNTUN ) THEN
345: *
346: * Path 1 (M much larger than N, JOBU='N')
347: *
348: MAXWRK = N + LWORK_ZGEQRF
349: MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZGEBRD )
350: IF( WNTVO .OR. WNTVAS )
351: $ MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_P )
352: MINWRK = 3*N
353: ELSE IF( WNTUO .AND. WNTVN ) THEN
354: *
355: * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
356: *
357: WRKBL = N + LWORK_ZGEQRF
358: WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
359: WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
360: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
361: MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
362: MINWRK = 2*N + M
363: ELSE IF( WNTUO .AND. WNTVAS ) THEN
364: *
365: * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
366: * 'A')
367: *
368: WRKBL = N + LWORK_ZGEQRF
369: WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
370: WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
371: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
372: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
373: MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
374: MINWRK = 2*N + M
375: ELSE IF( WNTUS .AND. WNTVN ) THEN
376: *
377: * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
378: *
379: WRKBL = N + LWORK_ZGEQRF
380: WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
381: WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
382: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
383: MAXWRK = N*N + WRKBL
384: MINWRK = 2*N + M
385: ELSE IF( WNTUS .AND. WNTVO ) THEN
386: *
387: * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
388: *
389: WRKBL = N + LWORK_ZGEQRF
390: WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
391: WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
392: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
393: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
394: MAXWRK = 2*N*N + WRKBL
395: MINWRK = 2*N + M
396: ELSE IF( WNTUS .AND. WNTVAS ) THEN
397: *
398: * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
399: * 'A')
400: *
401: WRKBL = N + LWORK_ZGEQRF
402: WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N )
403: WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
404: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
405: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
406: MAXWRK = N*N + WRKBL
407: MINWRK = 2*N + M
408: ELSE IF( WNTUA .AND. WNTVN ) THEN
409: *
410: * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
411: *
412: WRKBL = N + LWORK_ZGEQRF
413: WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M )
414: WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
415: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
416: MAXWRK = N*N + WRKBL
417: MINWRK = 2*N + M
418: ELSE IF( WNTUA .AND. WNTVO ) THEN
419: *
420: * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421: *
422: WRKBL = N + LWORK_ZGEQRF
423: WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M )
424: WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
425: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
426: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
427: MAXWRK = 2*N*N + WRKBL
428: MINWRK = 2*N + M
429: ELSE IF( WNTUA .AND. WNTVAS ) THEN
430: *
431: * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
432: * 'A')
433: *
434: WRKBL = N + LWORK_ZGEQRF
435: WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M )
436: WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD )
437: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q )
438: WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P )
439: MAXWRK = N*N + WRKBL
440: MINWRK = 2*N + M
441: END IF
442: ELSE
443: *
444: * Path 10 (M at least N, but not much larger)
445: *
446: CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
447: $ CDUM(1), CDUM(1), -1, IERR )
448: LWORK_ZGEBRD=CDUM(1)
449: MAXWRK = 2*N + LWORK_ZGEBRD
450: IF( WNTUS .OR. WNTUO ) THEN
451: CALL ZUNGBR( 'Q', M, N, N, A, LDA, CDUM(1),
452: $ CDUM(1), -1, IERR )
453: LWORK_ZUNGBR_Q=CDUM(1)
454: MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q )
455: END IF
456: IF( WNTUA ) THEN
457: CALL ZUNGBR( 'Q', M, M, N, A, LDA, CDUM(1),
458: $ CDUM(1), -1, IERR )
459: LWORK_ZUNGBR_Q=CDUM(1)
460: MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q )
461: END IF
462: IF( .NOT.WNTVN ) THEN
463: MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_P )
464: MINWRK = 2*N + M
465: END IF
466: END IF
467: ELSE IF( MINMN.GT.0 ) THEN
468: *
469: * Space needed for ZBDSQR is BDSPAC = 5*M
470: *
471: MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
472: * Compute space needed for ZGELQF
473: CALL ZGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
474: LWORK_ZGELQF=CDUM(1)
475: * Compute space needed for ZUNGLQ
476: CALL ZUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1,
477: $ IERR )
478: LWORK_ZUNGLQ_N=CDUM(1)
479: CALL ZUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR )
480: LWORK_ZUNGLQ_M=CDUM(1)
481: * Compute space needed for ZGEBRD
482: CALL ZGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1),
483: $ CDUM(1), CDUM(1), -1, IERR )
484: LWORK_ZGEBRD=CDUM(1)
485: * Compute space needed for ZUNGBR P
486: CALL ZUNGBR( 'P', M, M, M, A, N, CDUM(1),
487: $ CDUM(1), -1, IERR )
488: LWORK_ZUNGBR_P=CDUM(1)
489: * Compute space needed for ZUNGBR Q
490: CALL ZUNGBR( 'Q', M, M, M, A, N, CDUM(1),
491: $ CDUM(1), -1, IERR )
492: LWORK_ZUNGBR_Q=CDUM(1)
493: IF( N.GE.MNTHR ) THEN
494: IF( WNTVN ) THEN
495: *
496: * Path 1t(N much larger than M, JOBVT='N')
497: *
498: MAXWRK = M + LWORK_ZGELQF
499: MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZGEBRD )
500: IF( WNTUO .OR. WNTUAS )
501: $ MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_Q )
502: MINWRK = 3*M
503: ELSE IF( WNTVO .AND. WNTUN ) THEN
504: *
505: * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
506: *
507: WRKBL = M + LWORK_ZGELQF
508: WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
509: WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
510: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
511: MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
512: MINWRK = 2*M + N
513: ELSE IF( WNTVO .AND. WNTUAS ) THEN
514: *
515: * Path 3t(N much larger than M, JOBU='S' or 'A',
516: * JOBVT='O')
517: *
518: WRKBL = M + LWORK_ZGELQF
519: WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
520: WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
521: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
522: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
523: MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
524: MINWRK = 2*M + N
525: ELSE IF( WNTVS .AND. WNTUN ) THEN
526: *
527: * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
528: *
529: WRKBL = M + LWORK_ZGELQF
530: WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
531: WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
532: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
533: MAXWRK = M*M + WRKBL
534: MINWRK = 2*M + N
535: ELSE IF( WNTVS .AND. WNTUO ) THEN
536: *
537: * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
538: *
539: WRKBL = M + LWORK_ZGELQF
540: WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
541: WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
542: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
543: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
544: MAXWRK = 2*M*M + WRKBL
545: MINWRK = 2*M + N
546: ELSE IF( WNTVS .AND. WNTUAS ) THEN
547: *
548: * Path 6t(N much larger than M, JOBU='S' or 'A',
549: * JOBVT='S')
550: *
551: WRKBL = M + LWORK_ZGELQF
552: WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M )
553: WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
554: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
555: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
556: MAXWRK = M*M + WRKBL
557: MINWRK = 2*M + N
558: ELSE IF( WNTVA .AND. WNTUN ) THEN
559: *
560: * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
561: *
562: WRKBL = M + LWORK_ZGELQF
563: WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N )
564: WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
565: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
566: MAXWRK = M*M + WRKBL
567: MINWRK = 2*M + N
568: ELSE IF( WNTVA .AND. WNTUO ) THEN
569: *
570: * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
571: *
572: WRKBL = M + LWORK_ZGELQF
573: WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N )
574: WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
575: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
576: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
577: MAXWRK = 2*M*M + WRKBL
578: MINWRK = 2*M + N
579: ELSE IF( WNTVA .AND. WNTUAS ) THEN
580: *
581: * Path 9t(N much larger than M, JOBU='S' or 'A',
582: * JOBVT='A')
583: *
584: WRKBL = M + LWORK_ZGELQF
585: WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N )
586: WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD )
587: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P )
588: WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q )
589: MAXWRK = M*M + WRKBL
590: MINWRK = 2*M + N
591: END IF
592: ELSE
593: *
594: * Path 10t(N greater than M, but not much larger)
595: *
596: CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
597: $ CDUM(1), CDUM(1), -1, IERR )
598: LWORK_ZGEBRD=CDUM(1)
599: MAXWRK = 2*M + LWORK_ZGEBRD
600: IF( WNTVS .OR. WNTVO ) THEN
601: * Compute space needed for ZUNGBR P
602: CALL ZUNGBR( 'P', M, N, M, A, N, CDUM(1),
603: $ CDUM(1), -1, IERR )
604: LWORK_ZUNGBR_P=CDUM(1)
605: MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P )
606: END IF
607: IF( WNTVA ) THEN
608: CALL ZUNGBR( 'P', N, N, M, A, N, CDUM(1),
609: $ CDUM(1), -1, IERR )
610: LWORK_ZUNGBR_P=CDUM(1)
611: MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P )
612: END IF
613: IF( .NOT.WNTUN ) THEN
614: MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_Q )
615: MINWRK = 2*M + N
616: END IF
617: END IF
618: END IF
619: MAXWRK = MAX( MAXWRK, MINWRK )
620: WORK( 1 ) = MAXWRK
621: *
622: IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
623: INFO = -13
624: END IF
625: END IF
626: *
627: IF( INFO.NE.0 ) THEN
628: CALL XERBLA( 'ZGESVD', -INFO )
629: RETURN
630: ELSE IF( LQUERY ) THEN
631: RETURN
632: END IF
633: *
634: * Quick return if possible
635: *
636: IF( M.EQ.0 .OR. N.EQ.0 ) THEN
637: RETURN
638: END IF
639: *
640: * Get machine constants
641: *
642: EPS = DLAMCH( 'P' )
643: SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
644: BIGNUM = ONE / SMLNUM
645: *
646: * Scale A if max element outside range [SMLNUM,BIGNUM]
647: *
648: ANRM = ZLANGE( 'M', M, N, A, LDA, DUM )
649: ISCL = 0
650: IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
651: ISCL = 1
652: CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
653: ELSE IF( ANRM.GT.BIGNUM ) THEN
654: ISCL = 1
655: CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
656: END IF
657: *
658: IF( M.GE.N ) THEN
659: *
660: * A has at least as many rows as columns. If A has sufficiently
661: * more rows than columns, first reduce using the QR
662: * decomposition (if sufficient workspace available)
663: *
664: IF( M.GE.MNTHR ) THEN
665: *
666: IF( WNTUN ) THEN
667: *
668: * Path 1 (M much larger than N, JOBU='N')
669: * No left singular vectors to be computed
670: *
671: ITAU = 1
672: IWORK = ITAU + N
673: *
674: * Compute A=Q*R
675: * (CWorkspace: need 2*N, prefer N+N*NB)
676: * (RWorkspace: need 0)
677: *
678: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
679: $ LWORK-IWORK+1, IERR )
680: *
681: * Zero out below R
682: *
683: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
684: $ LDA )
685: IE = 1
686: ITAUQ = 1
687: ITAUP = ITAUQ + N
688: IWORK = ITAUP + N
689: *
690: * Bidiagonalize R in A
691: * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
692: * (RWorkspace: need N)
693: *
694: CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
695: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
696: $ IERR )
697: NCVT = 0
698: IF( WNTVO .OR. WNTVAS ) THEN
699: *
700: * If right singular vectors desired, generate P'.
701: * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
702: * (RWorkspace: 0)
703: *
704: CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
705: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
706: NCVT = N
707: END IF
708: IRWORK = IE + N
709: *
710: * Perform bidiagonal QR iteration, computing right
711: * singular vectors of A in A if desired
712: * (CWorkspace: 0)
713: * (RWorkspace: need BDSPAC)
714: *
715: CALL ZBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA,
716: $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
717: *
718: * If right singular vectors desired in VT, copy them there
719: *
720: IF( WNTVAS )
721: $ CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT )
722: *
723: ELSE IF( WNTUO .AND. WNTVN ) THEN
724: *
725: * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
726: * N left singular vectors to be overwritten on A and
727: * no right singular vectors to be computed
728: *
729: IF( LWORK.GE.N*N+3*N ) THEN
730: *
731: * Sufficient workspace for a fast algorithm
732: *
733: IR = 1
734: IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
735: *
736: * WORK(IU) is LDA by N, WORK(IR) is LDA by N
737: *
738: LDWRKU = LDA
739: LDWRKR = LDA
740: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
741: *
742: * WORK(IU) is LDA by N, WORK(IR) is N by N
743: *
744: LDWRKU = LDA
745: LDWRKR = N
746: ELSE
747: *
748: * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
749: *
750: LDWRKU = ( LWORK-N*N ) / N
751: LDWRKR = N
752: END IF
753: ITAU = IR + LDWRKR*N
754: IWORK = ITAU + N
755: *
756: * Compute A=Q*R
757: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
758: * (RWorkspace: 0)
759: *
760: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
761: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
762: *
763: * Copy R to WORK(IR) and zero out below it
764: *
765: CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
766: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
767: $ WORK( IR+1 ), LDWRKR )
768: *
769: * Generate Q in A
770: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
771: * (RWorkspace: 0)
772: *
773: CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
774: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
775: IE = 1
776: ITAUQ = ITAU
777: ITAUP = ITAUQ + N
778: IWORK = ITAUP + N
779: *
780: * Bidiagonalize R in WORK(IR)
781: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
782: * (RWorkspace: need N)
783: *
784: CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
785: $ WORK( ITAUQ ), WORK( ITAUP ),
786: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
787: *
788: * Generate left vectors bidiagonalizing R
789: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
790: * (RWorkspace: need 0)
791: *
792: CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
793: $ WORK( ITAUQ ), WORK( IWORK ),
794: $ LWORK-IWORK+1, IERR )
795: IRWORK = IE + N
796: *
797: * Perform bidiagonal QR iteration, computing left
798: * singular vectors of R in WORK(IR)
799: * (CWorkspace: need N*N)
800: * (RWorkspace: need BDSPAC)
801: *
802: CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1,
803: $ WORK( IR ), LDWRKR, CDUM, 1,
804: $ RWORK( IRWORK ), INFO )
805: IU = ITAUQ
806: *
807: * Multiply Q in A by left singular vectors of R in
808: * WORK(IR), storing result in WORK(IU) and copying to A
809: * (CWorkspace: need N*N+N, prefer N*N+M*N)
810: * (RWorkspace: 0)
811: *
812: DO 10 I = 1, M, LDWRKU
813: CHUNK = MIN( M-I+1, LDWRKU )
814: CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
815: $ LDA, WORK( IR ), LDWRKR, CZERO,
816: $ WORK( IU ), LDWRKU )
817: CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
818: $ A( I, 1 ), LDA )
819: 10 CONTINUE
820: *
821: ELSE
822: *
823: * Insufficient workspace for a fast algorithm
824: *
825: IE = 1
826: ITAUQ = 1
827: ITAUP = ITAUQ + N
828: IWORK = ITAUP + N
829: *
830: * Bidiagonalize A
831: * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
832: * (RWorkspace: N)
833: *
834: CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ),
835: $ WORK( ITAUQ ), WORK( ITAUP ),
836: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
837: *
838: * Generate left vectors bidiagonalizing A
839: * (CWorkspace: need 3*N, prefer 2*N+N*NB)
840: * (RWorkspace: 0)
841: *
842: CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
843: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
844: IRWORK = IE + N
845: *
846: * Perform bidiagonal QR iteration, computing left
847: * singular vectors of A in A
848: * (CWorkspace: need 0)
849: * (RWorkspace: need BDSPAC)
850: *
851: CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1,
852: $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
853: *
854: END IF
855: *
856: ELSE IF( WNTUO .AND. WNTVAS ) THEN
857: *
858: * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859: * N left singular vectors to be overwritten on A and
860: * N right singular vectors to be computed in VT
861: *
862: IF( LWORK.GE.N*N+3*N ) THEN
863: *
864: * Sufficient workspace for a fast algorithm
865: *
866: IR = 1
867: IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
868: *
869: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
870: *
871: LDWRKU = LDA
872: LDWRKR = LDA
873: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
874: *
875: * WORK(IU) is LDA by N and WORK(IR) is N by N
876: *
877: LDWRKU = LDA
878: LDWRKR = N
879: ELSE
880: *
881: * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882: *
883: LDWRKU = ( LWORK-N*N ) / N
884: LDWRKR = N
885: END IF
886: ITAU = IR + LDWRKR*N
887: IWORK = ITAU + N
888: *
889: * Compute A=Q*R
890: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
891: * (RWorkspace: 0)
892: *
893: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
894: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
895: *
896: * Copy R to VT, zeroing out below it
897: *
898: CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
899: IF( N.GT.1 )
900: $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
901: $ VT( 2, 1 ), LDVT )
902: *
903: * Generate Q in A
904: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
905: * (RWorkspace: 0)
906: *
907: CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
908: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
909: IE = 1
910: ITAUQ = ITAU
911: ITAUP = ITAUQ + N
912: IWORK = ITAUP + N
913: *
914: * Bidiagonalize R in VT, copying result to WORK(IR)
915: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
916: * (RWorkspace: need N)
917: *
918: CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
919: $ WORK( ITAUQ ), WORK( ITAUP ),
920: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
921: CALL ZLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
922: *
923: * Generate left vectors bidiagonalizing R in WORK(IR)
924: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
925: * (RWorkspace: 0)
926: *
927: CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
928: $ WORK( ITAUQ ), WORK( IWORK ),
929: $ LWORK-IWORK+1, IERR )
930: *
931: * Generate right vectors bidiagonalizing R in VT
932: * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
933: * (RWorkspace: 0)
934: *
935: CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
936: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
937: IRWORK = IE + N
938: *
939: * Perform bidiagonal QR iteration, computing left
940: * singular vectors of R in WORK(IR) and computing right
941: * singular vectors of R in VT
942: * (CWorkspace: need N*N)
943: * (RWorkspace: need BDSPAC)
944: *
945: CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
946: $ LDVT, WORK( IR ), LDWRKR, CDUM, 1,
947: $ RWORK( IRWORK ), INFO )
948: IU = ITAUQ
949: *
950: * Multiply Q in A by left singular vectors of R in
951: * WORK(IR), storing result in WORK(IU) and copying to A
952: * (CWorkspace: need N*N+N, prefer N*N+M*N)
953: * (RWorkspace: 0)
954: *
955: DO 20 I = 1, M, LDWRKU
956: CHUNK = MIN( M-I+1, LDWRKU )
957: CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
958: $ LDA, WORK( IR ), LDWRKR, CZERO,
959: $ WORK( IU ), LDWRKU )
960: CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
961: $ A( I, 1 ), LDA )
962: 20 CONTINUE
963: *
964: ELSE
965: *
966: * Insufficient workspace for a fast algorithm
967: *
968: ITAU = 1
969: IWORK = ITAU + N
970: *
971: * Compute A=Q*R
972: * (CWorkspace: need 2*N, prefer N+N*NB)
973: * (RWorkspace: 0)
974: *
975: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
976: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
977: *
978: * Copy R to VT, zeroing out below it
979: *
980: CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
981: IF( N.GT.1 )
982: $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
983: $ VT( 2, 1 ), LDVT )
984: *
985: * Generate Q in A
986: * (CWorkspace: need 2*N, prefer N+N*NB)
987: * (RWorkspace: 0)
988: *
989: CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
990: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
991: IE = 1
992: ITAUQ = ITAU
993: ITAUP = ITAUQ + N
994: IWORK = ITAUP + N
995: *
996: * Bidiagonalize R in VT
997: * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
998: * (RWorkspace: N)
999: *
1000: CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1001: $ WORK( ITAUQ ), WORK( ITAUP ),
1002: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1003: *
1004: * Multiply Q in A by left vectors bidiagonalizing R
1005: * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1006: * (RWorkspace: 0)
1007: *
1008: CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1009: $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
1010: $ LWORK-IWORK+1, IERR )
1011: *
1012: * Generate right vectors bidiagonalizing R in VT
1013: * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1014: * (RWorkspace: 0)
1015: *
1016: CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1017: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1018: IRWORK = IE + N
1019: *
1020: * Perform bidiagonal QR iteration, computing left
1021: * singular vectors of A in A and computing right
1022: * singular vectors of A in VT
1023: * (CWorkspace: 0)
1024: * (RWorkspace: need BDSPAC)
1025: *
1026: CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1027: $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
1028: $ INFO )
1029: *
1030: END IF
1031: *
1032: ELSE IF( WNTUS ) THEN
1033: *
1034: IF( WNTVN ) THEN
1035: *
1036: * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1037: * N left singular vectors to be computed in U and
1038: * no right singular vectors to be computed
1039: *
1040: IF( LWORK.GE.N*N+3*N ) THEN
1041: *
1042: * Sufficient workspace for a fast algorithm
1043: *
1044: IR = 1
1045: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1046: *
1047: * WORK(IR) is LDA by N
1048: *
1049: LDWRKR = LDA
1050: ELSE
1051: *
1052: * WORK(IR) is N by N
1053: *
1054: LDWRKR = N
1055: END IF
1056: ITAU = IR + LDWRKR*N
1057: IWORK = ITAU + N
1058: *
1059: * Compute A=Q*R
1060: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1061: * (RWorkspace: 0)
1062: *
1063: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1064: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1065: *
1066: * Copy R to WORK(IR), zeroing out below it
1067: *
1068: CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ),
1069: $ LDWRKR )
1070: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1071: $ WORK( IR+1 ), LDWRKR )
1072: *
1073: * Generate Q in A
1074: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1075: * (RWorkspace: 0)
1076: *
1077: CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1078: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1079: IE = 1
1080: ITAUQ = ITAU
1081: ITAUP = ITAUQ + N
1082: IWORK = ITAUP + N
1083: *
1084: * Bidiagonalize R in WORK(IR)
1085: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1086: * (RWorkspace: need N)
1087: *
1088: CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S,
1089: $ RWORK( IE ), WORK( ITAUQ ),
1090: $ WORK( ITAUP ), WORK( IWORK ),
1091: $ LWORK-IWORK+1, IERR )
1092: *
1093: * Generate left vectors bidiagonalizing R in WORK(IR)
1094: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1095: * (RWorkspace: 0)
1096: *
1097: CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1098: $ WORK( ITAUQ ), WORK( IWORK ),
1099: $ LWORK-IWORK+1, IERR )
1100: IRWORK = IE + N
1101: *
1102: * Perform bidiagonal QR iteration, computing left
1103: * singular vectors of R in WORK(IR)
1104: * (CWorkspace: need N*N)
1105: * (RWorkspace: need BDSPAC)
1106: *
1107: CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1108: $ 1, WORK( IR ), LDWRKR, CDUM, 1,
1109: $ RWORK( IRWORK ), INFO )
1110: *
1111: * Multiply Q in A by left singular vectors of R in
1112: * WORK(IR), storing result in U
1113: * (CWorkspace: need N*N)
1114: * (RWorkspace: 0)
1115: *
1116: CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1117: $ WORK( IR ), LDWRKR, CZERO, U, LDU )
1118: *
1119: ELSE
1120: *
1121: * Insufficient workspace for a fast algorithm
1122: *
1123: ITAU = 1
1124: IWORK = ITAU + N
1125: *
1126: * Compute A=Q*R, copying result to U
1127: * (CWorkspace: need 2*N, prefer N+N*NB)
1128: * (RWorkspace: 0)
1129: *
1130: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1131: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1132: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1133: *
1134: * Generate Q in U
1135: * (CWorkspace: need 2*N, prefer N+N*NB)
1136: * (RWorkspace: 0)
1137: *
1138: CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1139: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1140: IE = 1
1141: ITAUQ = ITAU
1142: ITAUP = ITAUQ + N
1143: IWORK = ITAUP + N
1144: *
1145: * Zero out below R in A
1146: *
1147: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1148: $ A( 2, 1 ), LDA )
1149: *
1150: * Bidiagonalize R in A
1151: * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1152: * (RWorkspace: need N)
1153: *
1154: CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
1155: $ WORK( ITAUQ ), WORK( ITAUP ),
1156: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1157: *
1158: * Multiply Q in U by left vectors bidiagonalizing R
1159: * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1160: * (RWorkspace: 0)
1161: *
1162: CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1163: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1164: $ LWORK-IWORK+1, IERR )
1165: IRWORK = IE + N
1166: *
1167: * Perform bidiagonal QR iteration, computing left
1168: * singular vectors of A in U
1169: * (CWorkspace: 0)
1170: * (RWorkspace: need BDSPAC)
1171: *
1172: CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1173: $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1174: $ INFO )
1175: *
1176: END IF
1177: *
1178: ELSE IF( WNTVO ) THEN
1179: *
1180: * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1181: * N left singular vectors to be computed in U and
1182: * N right singular vectors to be overwritten on A
1183: *
1184: IF( LWORK.GE.2*N*N+3*N ) THEN
1185: *
1186: * Sufficient workspace for a fast algorithm
1187: *
1188: IU = 1
1189: IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1190: *
1191: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1192: *
1193: LDWRKU = LDA
1194: IR = IU + LDWRKU*N
1195: LDWRKR = LDA
1196: ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1197: *
1198: * WORK(IU) is LDA by N and WORK(IR) is N by N
1199: *
1200: LDWRKU = LDA
1201: IR = IU + LDWRKU*N
1202: LDWRKR = N
1203: ELSE
1204: *
1205: * WORK(IU) is N by N and WORK(IR) is N by N
1206: *
1207: LDWRKU = N
1208: IR = IU + LDWRKU*N
1209: LDWRKR = N
1210: END IF
1211: ITAU = IR + LDWRKR*N
1212: IWORK = ITAU + N
1213: *
1214: * Compute A=Q*R
1215: * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1216: * (RWorkspace: 0)
1217: *
1218: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1219: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1220: *
1221: * Copy R to WORK(IU), zeroing out below it
1222: *
1223: CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
1224: $ LDWRKU )
1225: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1226: $ WORK( IU+1 ), LDWRKU )
1227: *
1228: * Generate Q in A
1229: * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1230: * (RWorkspace: 0)
1231: *
1232: CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1233: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1234: IE = 1
1235: ITAUQ = ITAU
1236: ITAUP = ITAUQ + N
1237: IWORK = ITAUP + N
1238: *
1239: * Bidiagonalize R in WORK(IU), copying result to
1240: * WORK(IR)
1241: * (CWorkspace: need 2*N*N+3*N,
1242: * prefer 2*N*N+2*N+2*N*NB)
1243: * (RWorkspace: need N)
1244: *
1245: CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
1246: $ RWORK( IE ), WORK( ITAUQ ),
1247: $ WORK( ITAUP ), WORK( IWORK ),
1248: $ LWORK-IWORK+1, IERR )
1249: CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1250: $ WORK( IR ), LDWRKR )
1251: *
1252: * Generate left bidiagonalizing vectors in WORK(IU)
1253: * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1254: * (RWorkspace: 0)
1255: *
1256: CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1257: $ WORK( ITAUQ ), WORK( IWORK ),
1258: $ LWORK-IWORK+1, IERR )
1259: *
1260: * Generate right bidiagonalizing vectors in WORK(IR)
1261: * (CWorkspace: need 2*N*N+3*N-1,
1262: * prefer 2*N*N+2*N+(N-1)*NB)
1263: * (RWorkspace: 0)
1264: *
1265: CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1266: $ WORK( ITAUP ), WORK( IWORK ),
1267: $ LWORK-IWORK+1, IERR )
1268: IRWORK = IE + N
1269: *
1270: * Perform bidiagonal QR iteration, computing left
1271: * singular vectors of R in WORK(IU) and computing
1272: * right singular vectors of R in WORK(IR)
1273: * (CWorkspace: need 2*N*N)
1274: * (RWorkspace: need BDSPAC)
1275: *
1276: CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1277: $ WORK( IR ), LDWRKR, WORK( IU ),
1278: $ LDWRKU, CDUM, 1, RWORK( IRWORK ),
1279: $ INFO )
1280: *
1281: * Multiply Q in A by left singular vectors of R in
1282: * WORK(IU), storing result in U
1283: * (CWorkspace: need N*N)
1284: * (RWorkspace: 0)
1285: *
1286: CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1287: $ WORK( IU ), LDWRKU, CZERO, U, LDU )
1288: *
1289: * Copy right singular vectors of R to A
1290: * (CWorkspace: need N*N)
1291: * (RWorkspace: 0)
1292: *
1293: CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1294: $ LDA )
1295: *
1296: ELSE
1297: *
1298: * Insufficient workspace for a fast algorithm
1299: *
1300: ITAU = 1
1301: IWORK = ITAU + N
1302: *
1303: * Compute A=Q*R, copying result to U
1304: * (CWorkspace: need 2*N, prefer N+N*NB)
1305: * (RWorkspace: 0)
1306: *
1307: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1308: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1309: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1310: *
1311: * Generate Q in U
1312: * (CWorkspace: need 2*N, prefer N+N*NB)
1313: * (RWorkspace: 0)
1314: *
1315: CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1316: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1317: IE = 1
1318: ITAUQ = ITAU
1319: ITAUP = ITAUQ + N
1320: IWORK = ITAUP + N
1321: *
1322: * Zero out below R in A
1323: *
1324: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1325: $ A( 2, 1 ), LDA )
1326: *
1327: * Bidiagonalize R in A
1328: * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1329: * (RWorkspace: need N)
1330: *
1331: CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
1332: $ WORK( ITAUQ ), WORK( ITAUP ),
1333: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1334: *
1335: * Multiply Q in U by left vectors bidiagonalizing R
1336: * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1337: * (RWorkspace: 0)
1338: *
1339: CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1340: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1341: $ LWORK-IWORK+1, IERR )
1342: *
1343: * Generate right vectors bidiagonalizing R in A
1344: * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1345: * (RWorkspace: 0)
1346: *
1347: CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1348: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1349: IRWORK = IE + N
1350: *
1351: * Perform bidiagonal QR iteration, computing left
1352: * singular vectors of A in U and computing right
1353: * singular vectors of A in A
1354: * (CWorkspace: 0)
1355: * (RWorkspace: need BDSPAC)
1356: *
1357: CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1358: $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1359: $ INFO )
1360: *
1361: END IF
1362: *
1363: ELSE IF( WNTVAS ) THEN
1364: *
1365: * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1366: * or 'A')
1367: * N left singular vectors to be computed in U and
1368: * N right singular vectors to be computed in VT
1369: *
1370: IF( LWORK.GE.N*N+3*N ) THEN
1371: *
1372: * Sufficient workspace for a fast algorithm
1373: *
1374: IU = 1
1375: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1376: *
1377: * WORK(IU) is LDA by N
1378: *
1379: LDWRKU = LDA
1380: ELSE
1381: *
1382: * WORK(IU) is N by N
1383: *
1384: LDWRKU = N
1385: END IF
1386: ITAU = IU + LDWRKU*N
1387: IWORK = ITAU + N
1388: *
1389: * Compute A=Q*R
1390: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1391: * (RWorkspace: 0)
1392: *
1393: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1394: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1395: *
1396: * Copy R to WORK(IU), zeroing out below it
1397: *
1398: CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
1399: $ LDWRKU )
1400: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1401: $ WORK( IU+1 ), LDWRKU )
1402: *
1403: * Generate Q in A
1404: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1405: * (RWorkspace: 0)
1406: *
1407: CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1408: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1409: IE = 1
1410: ITAUQ = ITAU
1411: ITAUP = ITAUQ + N
1412: IWORK = ITAUP + N
1413: *
1414: * Bidiagonalize R in WORK(IU), copying result to VT
1415: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1416: * (RWorkspace: need N)
1417: *
1418: CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
1419: $ RWORK( IE ), WORK( ITAUQ ),
1420: $ WORK( ITAUP ), WORK( IWORK ),
1421: $ LWORK-IWORK+1, IERR )
1422: CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1423: $ LDVT )
1424: *
1425: * Generate left bidiagonalizing vectors in WORK(IU)
1426: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1427: * (RWorkspace: 0)
1428: *
1429: CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1430: $ WORK( ITAUQ ), WORK( IWORK ),
1431: $ LWORK-IWORK+1, IERR )
1432: *
1433: * Generate right bidiagonalizing vectors in VT
1434: * (CWorkspace: need N*N+3*N-1,
1435: * prefer N*N+2*N+(N-1)*NB)
1436: * (RWorkspace: 0)
1437: *
1438: CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1439: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1440: IRWORK = IE + N
1441: *
1442: * Perform bidiagonal QR iteration, computing left
1443: * singular vectors of R in WORK(IU) and computing
1444: * right singular vectors of R in VT
1445: * (CWorkspace: need N*N)
1446: * (RWorkspace: need BDSPAC)
1447: *
1448: CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1449: $ LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1450: $ RWORK( IRWORK ), INFO )
1451: *
1452: * Multiply Q in A by left singular vectors of R in
1453: * WORK(IU), storing result in U
1454: * (CWorkspace: need N*N)
1455: * (RWorkspace: 0)
1456: *
1457: CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1458: $ WORK( IU ), LDWRKU, CZERO, U, LDU )
1459: *
1460: ELSE
1461: *
1462: * Insufficient workspace for a fast algorithm
1463: *
1464: ITAU = 1
1465: IWORK = ITAU + N
1466: *
1467: * Compute A=Q*R, copying result to U
1468: * (CWorkspace: need 2*N, prefer N+N*NB)
1469: * (RWorkspace: 0)
1470: *
1471: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1472: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1473: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1474: *
1475: * Generate Q in U
1476: * (CWorkspace: need 2*N, prefer N+N*NB)
1477: * (RWorkspace: 0)
1478: *
1479: CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1480: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1481: *
1482: * Copy R to VT, zeroing out below it
1483: *
1484: CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
1485: IF( N.GT.1 )
1486: $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1487: $ VT( 2, 1 ), LDVT )
1488: IE = 1
1489: ITAUQ = ITAU
1490: ITAUP = ITAUQ + N
1491: IWORK = ITAUP + N
1492: *
1493: * Bidiagonalize R in VT
1494: * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1495: * (RWorkspace: need N)
1496: *
1497: CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1498: $ WORK( ITAUQ ), WORK( ITAUP ),
1499: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1500: *
1501: * Multiply Q in U by left bidiagonalizing vectors
1502: * in VT
1503: * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1504: * (RWorkspace: 0)
1505: *
1506: CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1507: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1508: $ LWORK-IWORK+1, IERR )
1509: *
1510: * Generate right bidiagonalizing vectors in VT
1511: * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1512: * (RWorkspace: 0)
1513: *
1514: CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1515: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1516: IRWORK = IE + N
1517: *
1518: * Perform bidiagonal QR iteration, computing left
1519: * singular vectors of A in U and computing right
1520: * singular vectors of A in VT
1521: * (CWorkspace: 0)
1522: * (RWorkspace: need BDSPAC)
1523: *
1524: CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1525: $ LDVT, U, LDU, CDUM, 1,
1526: $ RWORK( IRWORK ), INFO )
1527: *
1528: END IF
1529: *
1530: END IF
1531: *
1532: ELSE IF( WNTUA ) THEN
1533: *
1534: IF( WNTVN ) THEN
1535: *
1536: * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1537: * M left singular vectors to be computed in U and
1538: * no right singular vectors to be computed
1539: *
1540: IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1541: *
1542: * Sufficient workspace for a fast algorithm
1543: *
1544: IR = 1
1545: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1546: *
1547: * WORK(IR) is LDA by N
1548: *
1549: LDWRKR = LDA
1550: ELSE
1551: *
1552: * WORK(IR) is N by N
1553: *
1554: LDWRKR = N
1555: END IF
1556: ITAU = IR + LDWRKR*N
1557: IWORK = ITAU + N
1558: *
1559: * Compute A=Q*R, copying result to U
1560: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1561: * (RWorkspace: 0)
1562: *
1563: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1564: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1565: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1566: *
1567: * Copy R to WORK(IR), zeroing out below it
1568: *
1569: CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ),
1570: $ LDWRKR )
1571: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1572: $ WORK( IR+1 ), LDWRKR )
1573: *
1574: * Generate Q in U
1575: * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1576: * (RWorkspace: 0)
1577: *
1578: CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1579: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1580: IE = 1
1581: ITAUQ = ITAU
1582: ITAUP = ITAUQ + N
1583: IWORK = ITAUP + N
1584: *
1585: * Bidiagonalize R in WORK(IR)
1586: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1587: * (RWorkspace: need N)
1588: *
1589: CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S,
1590: $ RWORK( IE ), WORK( ITAUQ ),
1591: $ WORK( ITAUP ), WORK( IWORK ),
1592: $ LWORK-IWORK+1, IERR )
1593: *
1594: * Generate left bidiagonalizing vectors in WORK(IR)
1595: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1596: * (RWorkspace: 0)
1597: *
1598: CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1599: $ WORK( ITAUQ ), WORK( IWORK ),
1600: $ LWORK-IWORK+1, IERR )
1601: IRWORK = IE + N
1602: *
1603: * Perform bidiagonal QR iteration, computing left
1604: * singular vectors of R in WORK(IR)
1605: * (CWorkspace: need N*N)
1606: * (RWorkspace: need BDSPAC)
1607: *
1608: CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1609: $ 1, WORK( IR ), LDWRKR, CDUM, 1,
1610: $ RWORK( IRWORK ), INFO )
1611: *
1612: * Multiply Q in U by left singular vectors of R in
1613: * WORK(IR), storing result in A
1614: * (CWorkspace: need N*N)
1615: * (RWorkspace: 0)
1616: *
1617: CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1618: $ WORK( IR ), LDWRKR, CZERO, A, LDA )
1619: *
1620: * Copy left singular vectors of A from A to U
1621: *
1622: CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
1623: *
1624: ELSE
1625: *
1626: * Insufficient workspace for a fast algorithm
1627: *
1628: ITAU = 1
1629: IWORK = ITAU + N
1630: *
1631: * Compute A=Q*R, copying result to U
1632: * (CWorkspace: need 2*N, prefer N+N*NB)
1633: * (RWorkspace: 0)
1634: *
1635: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1636: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1637: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1638: *
1639: * Generate Q in U
1640: * (CWorkspace: need N+M, prefer N+M*NB)
1641: * (RWorkspace: 0)
1642: *
1643: CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1644: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1645: IE = 1
1646: ITAUQ = ITAU
1647: ITAUP = ITAUQ + N
1648: IWORK = ITAUP + N
1649: *
1650: * Zero out below R in A
1651: *
1652: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1653: $ A( 2, 1 ), LDA )
1654: *
1655: * Bidiagonalize R in A
1656: * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1657: * (RWorkspace: need N)
1658: *
1659: CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
1660: $ WORK( ITAUQ ), WORK( ITAUP ),
1661: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1662: *
1663: * Multiply Q in U by left bidiagonalizing vectors
1664: * in A
1665: * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1666: * (RWorkspace: 0)
1667: *
1668: CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1669: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1670: $ LWORK-IWORK+1, IERR )
1671: IRWORK = IE + N
1672: *
1673: * Perform bidiagonal QR iteration, computing left
1674: * singular vectors of A in U
1675: * (CWorkspace: 0)
1676: * (RWorkspace: need BDSPAC)
1677: *
1678: CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1679: $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1680: $ INFO )
1681: *
1682: END IF
1683: *
1684: ELSE IF( WNTVO ) THEN
1685: *
1686: * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1687: * M left singular vectors to be computed in U and
1688: * N right singular vectors to be overwritten on A
1689: *
1690: IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN
1691: *
1692: * Sufficient workspace for a fast algorithm
1693: *
1694: IU = 1
1695: IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1696: *
1697: * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1698: *
1699: LDWRKU = LDA
1700: IR = IU + LDWRKU*N
1701: LDWRKR = LDA
1702: ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1703: *
1704: * WORK(IU) is LDA by N and WORK(IR) is N by N
1705: *
1706: LDWRKU = LDA
1707: IR = IU + LDWRKU*N
1708: LDWRKR = N
1709: ELSE
1710: *
1711: * WORK(IU) is N by N and WORK(IR) is N by N
1712: *
1713: LDWRKU = N
1714: IR = IU + LDWRKU*N
1715: LDWRKR = N
1716: END IF
1717: ITAU = IR + LDWRKR*N
1718: IWORK = ITAU + N
1719: *
1720: * Compute A=Q*R, copying result to U
1721: * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1722: * (RWorkspace: 0)
1723: *
1724: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1725: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1726: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1727: *
1728: * Generate Q in U
1729: * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1730: * (RWorkspace: 0)
1731: *
1732: CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1733: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1734: *
1735: * Copy R to WORK(IU), zeroing out below it
1736: *
1737: CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
1738: $ LDWRKU )
1739: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1740: $ WORK( IU+1 ), LDWRKU )
1741: IE = 1
1742: ITAUQ = ITAU
1743: ITAUP = ITAUQ + N
1744: IWORK = ITAUP + N
1745: *
1746: * Bidiagonalize R in WORK(IU), copying result to
1747: * WORK(IR)
1748: * (CWorkspace: need 2*N*N+3*N,
1749: * prefer 2*N*N+2*N+2*N*NB)
1750: * (RWorkspace: need N)
1751: *
1752: CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
1753: $ RWORK( IE ), WORK( ITAUQ ),
1754: $ WORK( ITAUP ), WORK( IWORK ),
1755: $ LWORK-IWORK+1, IERR )
1756: CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1757: $ WORK( IR ), LDWRKR )
1758: *
1759: * Generate left bidiagonalizing vectors in WORK(IU)
1760: * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1761: * (RWorkspace: 0)
1762: *
1763: CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1764: $ WORK( ITAUQ ), WORK( IWORK ),
1765: $ LWORK-IWORK+1, IERR )
1766: *
1767: * Generate right bidiagonalizing vectors in WORK(IR)
1768: * (CWorkspace: need 2*N*N+3*N-1,
1769: * prefer 2*N*N+2*N+(N-1)*NB)
1770: * (RWorkspace: 0)
1771: *
1772: CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1773: $ WORK( ITAUP ), WORK( IWORK ),
1774: $ LWORK-IWORK+1, IERR )
1775: IRWORK = IE + N
1776: *
1777: * Perform bidiagonal QR iteration, computing left
1778: * singular vectors of R in WORK(IU) and computing
1779: * right singular vectors of R in WORK(IR)
1780: * (CWorkspace: need 2*N*N)
1781: * (RWorkspace: need BDSPAC)
1782: *
1783: CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1784: $ WORK( IR ), LDWRKR, WORK( IU ),
1785: $ LDWRKU, CDUM, 1, RWORK( IRWORK ),
1786: $ INFO )
1787: *
1788: * Multiply Q in U by left singular vectors of R in
1789: * WORK(IU), storing result in A
1790: * (CWorkspace: need N*N)
1791: * (RWorkspace: 0)
1792: *
1793: CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1794: $ WORK( IU ), LDWRKU, CZERO, A, LDA )
1795: *
1796: * Copy left singular vectors of A from A to U
1797: *
1798: CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
1799: *
1800: * Copy right singular vectors of R from WORK(IR) to A
1801: *
1802: CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1803: $ LDA )
1804: *
1805: ELSE
1806: *
1807: * Insufficient workspace for a fast algorithm
1808: *
1809: ITAU = 1
1810: IWORK = ITAU + N
1811: *
1812: * Compute A=Q*R, copying result to U
1813: * (CWorkspace: need 2*N, prefer N+N*NB)
1814: * (RWorkspace: 0)
1815: *
1816: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1817: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1818: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1819: *
1820: * Generate Q in U
1821: * (CWorkspace: need N+M, prefer N+M*NB)
1822: * (RWorkspace: 0)
1823: *
1824: CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1825: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1826: IE = 1
1827: ITAUQ = ITAU
1828: ITAUP = ITAUQ + N
1829: IWORK = ITAUP + N
1830: *
1831: * Zero out below R in A
1832: *
1833: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1834: $ A( 2, 1 ), LDA )
1835: *
1836: * Bidiagonalize R in A
1837: * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1838: * (RWorkspace: need N)
1839: *
1840: CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
1841: $ WORK( ITAUQ ), WORK( ITAUP ),
1842: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1843: *
1844: * Multiply Q in U by left bidiagonalizing vectors
1845: * in A
1846: * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1847: * (RWorkspace: 0)
1848: *
1849: CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1850: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1851: $ LWORK-IWORK+1, IERR )
1852: *
1853: * Generate right bidiagonalizing vectors in A
1854: * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1855: * (RWorkspace: 0)
1856: *
1857: CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1858: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1859: IRWORK = IE + N
1860: *
1861: * Perform bidiagonal QR iteration, computing left
1862: * singular vectors of A in U and computing right
1863: * singular vectors of A in A
1864: * (CWorkspace: 0)
1865: * (RWorkspace: need BDSPAC)
1866: *
1867: CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1868: $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1869: $ INFO )
1870: *
1871: END IF
1872: *
1873: ELSE IF( WNTVAS ) THEN
1874: *
1875: * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1876: * or 'A')
1877: * M left singular vectors to be computed in U and
1878: * N right singular vectors to be computed in VT
1879: *
1880: IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1881: *
1882: * Sufficient workspace for a fast algorithm
1883: *
1884: IU = 1
1885: IF( LWORK.GE.WRKBL+LDA*N ) THEN
1886: *
1887: * WORK(IU) is LDA by N
1888: *
1889: LDWRKU = LDA
1890: ELSE
1891: *
1892: * WORK(IU) is N by N
1893: *
1894: LDWRKU = N
1895: END IF
1896: ITAU = IU + LDWRKU*N
1897: IWORK = ITAU + N
1898: *
1899: * Compute A=Q*R, copying result to U
1900: * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1901: * (RWorkspace: 0)
1902: *
1903: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1904: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1905: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1906: *
1907: * Generate Q in U
1908: * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1909: * (RWorkspace: 0)
1910: *
1911: CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1912: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1913: *
1914: * Copy R to WORK(IU), zeroing out below it
1915: *
1916: CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
1917: $ LDWRKU )
1918: CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
1919: $ WORK( IU+1 ), LDWRKU )
1920: IE = 1
1921: ITAUQ = ITAU
1922: ITAUP = ITAUQ + N
1923: IWORK = ITAUP + N
1924: *
1925: * Bidiagonalize R in WORK(IU), copying result to VT
1926: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1927: * (RWorkspace: need N)
1928: *
1929: CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
1930: $ RWORK( IE ), WORK( ITAUQ ),
1931: $ WORK( ITAUP ), WORK( IWORK ),
1932: $ LWORK-IWORK+1, IERR )
1933: CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1934: $ LDVT )
1935: *
1936: * Generate left bidiagonalizing vectors in WORK(IU)
1937: * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1938: * (RWorkspace: 0)
1939: *
1940: CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1941: $ WORK( ITAUQ ), WORK( IWORK ),
1942: $ LWORK-IWORK+1, IERR )
1943: *
1944: * Generate right bidiagonalizing vectors in VT
1945: * (CWorkspace: need N*N+3*N-1,
1946: * prefer N*N+2*N+(N-1)*NB)
1947: * (RWorkspace: need 0)
1948: *
1949: CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1950: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1951: IRWORK = IE + N
1952: *
1953: * Perform bidiagonal QR iteration, computing left
1954: * singular vectors of R in WORK(IU) and computing
1955: * right singular vectors of R in VT
1956: * (CWorkspace: need N*N)
1957: * (RWorkspace: need BDSPAC)
1958: *
1959: CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1960: $ LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1961: $ RWORK( IRWORK ), INFO )
1962: *
1963: * Multiply Q in U by left singular vectors of R in
1964: * WORK(IU), storing result in A
1965: * (CWorkspace: need N*N)
1966: * (RWorkspace: 0)
1967: *
1968: CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1969: $ WORK( IU ), LDWRKU, CZERO, A, LDA )
1970: *
1971: * Copy left singular vectors of A from A to U
1972: *
1973: CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
1974: *
1975: ELSE
1976: *
1977: * Insufficient workspace for a fast algorithm
1978: *
1979: ITAU = 1
1980: IWORK = ITAU + N
1981: *
1982: * Compute A=Q*R, copying result to U
1983: * (CWorkspace: need 2*N, prefer N+N*NB)
1984: * (RWorkspace: 0)
1985: *
1986: CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
1987: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1988: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
1989: *
1990: * Generate Q in U
1991: * (CWorkspace: need N+M, prefer N+M*NB)
1992: * (RWorkspace: 0)
1993: *
1994: CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1995: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1996: *
1997: * Copy R from A to VT, zeroing out below it
1998: *
1999: CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
2000: IF( N.GT.1 )
2001: $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
2002: $ VT( 2, 1 ), LDVT )
2003: IE = 1
2004: ITAUQ = ITAU
2005: ITAUP = ITAUQ + N
2006: IWORK = ITAUP + N
2007: *
2008: * Bidiagonalize R in VT
2009: * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2010: * (RWorkspace: need N)
2011: *
2012: CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
2013: $ WORK( ITAUQ ), WORK( ITAUP ),
2014: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2015: *
2016: * Multiply Q in U by left bidiagonalizing vectors
2017: * in VT
2018: * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2019: * (RWorkspace: 0)
2020: *
2021: CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
2022: $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
2023: $ LWORK-IWORK+1, IERR )
2024: *
2025: * Generate right bidiagonalizing vectors in VT
2026: * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2027: * (RWorkspace: 0)
2028: *
2029: CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2030: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2031: IRWORK = IE + N
2032: *
2033: * Perform bidiagonal QR iteration, computing left
2034: * singular vectors of A in U and computing right
2035: * singular vectors of A in VT
2036: * (CWorkspace: 0)
2037: * (RWorkspace: need BDSPAC)
2038: *
2039: CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
2040: $ LDVT, U, LDU, CDUM, 1,
2041: $ RWORK( IRWORK ), INFO )
2042: *
2043: END IF
2044: *
2045: END IF
2046: *
2047: END IF
2048: *
2049: ELSE
2050: *
2051: * M .LT. MNTHR
2052: *
2053: * Path 10 (M at least N, but not much larger)
2054: * Reduce to bidiagonal form without QR decomposition
2055: *
2056: IE = 1
2057: ITAUQ = 1
2058: ITAUP = ITAUQ + N
2059: IWORK = ITAUP + N
2060: *
2061: * Bidiagonalize A
2062: * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2063: * (RWorkspace: need N)
2064: *
2065: CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
2066: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2067: $ IERR )
2068: IF( WNTUAS ) THEN
2069: *
2070: * If left singular vectors desired in U, copy result to U
2071: * and generate left bidiagonalizing vectors in U
2072: * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2073: * (RWorkspace: 0)
2074: *
2075: CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
2076: IF( WNTUS )
2077: $ NCU = N
2078: IF( WNTUA )
2079: $ NCU = M
2080: CALL ZUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
2081: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2082: END IF
2083: IF( WNTVAS ) THEN
2084: *
2085: * If right singular vectors desired in VT, copy result to
2086: * VT and generate right bidiagonalizing vectors in VT
2087: * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2088: * (RWorkspace: 0)
2089: *
2090: CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
2091: CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2092: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2093: END IF
2094: IF( WNTUO ) THEN
2095: *
2096: * If left singular vectors desired in A, generate left
2097: * bidiagonalizing vectors in A
2098: * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2099: * (RWorkspace: 0)
2100: *
2101: CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2102: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2103: END IF
2104: IF( WNTVO ) THEN
2105: *
2106: * If right singular vectors desired in A, generate right
2107: * bidiagonalizing vectors in A
2108: * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2109: * (RWorkspace: 0)
2110: *
2111: CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2112: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2113: END IF
2114: IRWORK = IE + N
2115: IF( WNTUAS .OR. WNTUO )
2116: $ NRU = M
2117: IF( WNTUN )
2118: $ NRU = 0
2119: IF( WNTVAS .OR. WNTVO )
2120: $ NCVT = N
2121: IF( WNTVN )
2122: $ NCVT = 0
2123: IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2124: *
2125: * Perform bidiagonal QR iteration, if desired, computing
2126: * left singular vectors in U and computing right singular
2127: * vectors in VT
2128: * (CWorkspace: 0)
2129: * (RWorkspace: need BDSPAC)
2130: *
2131: CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2132: $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
2133: $ INFO )
2134: ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2135: *
2136: * Perform bidiagonal QR iteration, if desired, computing
2137: * left singular vectors in U and computing right singular
2138: * vectors in A
2139: * (CWorkspace: 0)
2140: * (RWorkspace: need BDSPAC)
2141: *
2142: CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A,
2143: $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
2144: $ INFO )
2145: ELSE
2146: *
2147: * Perform bidiagonal QR iteration, if desired, computing
2148: * left singular vectors in A and computing right singular
2149: * vectors in VT
2150: * (CWorkspace: 0)
2151: * (RWorkspace: need BDSPAC)
2152: *
2153: CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2154: $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
2155: $ INFO )
2156: END IF
2157: *
2158: END IF
2159: *
2160: ELSE
2161: *
2162: * A has more columns than rows. If A has sufficiently more
2163: * columns than rows, first reduce using the LQ decomposition (if
2164: * sufficient workspace available)
2165: *
2166: IF( N.GE.MNTHR ) THEN
2167: *
2168: IF( WNTVN ) THEN
2169: *
2170: * Path 1t(N much larger than M, JOBVT='N')
2171: * No right singular vectors to be computed
2172: *
2173: ITAU = 1
2174: IWORK = ITAU + M
2175: *
2176: * Compute A=L*Q
2177: * (CWorkspace: need 2*M, prefer M+M*NB)
2178: * (RWorkspace: 0)
2179: *
2180: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2181: $ LWORK-IWORK+1, IERR )
2182: *
2183: * Zero out above L
2184: *
2185: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
2186: $ LDA )
2187: IE = 1
2188: ITAUQ = 1
2189: ITAUP = ITAUQ + M
2190: IWORK = ITAUP + M
2191: *
2192: * Bidiagonalize L in A
2193: * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2194: * (RWorkspace: need M)
2195: *
2196: CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
2197: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2198: $ IERR )
2199: IF( WNTUO .OR. WNTUAS ) THEN
2200: *
2201: * If left singular vectors desired, generate Q
2202: * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2203: * (RWorkspace: 0)
2204: *
2205: CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2206: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2207: END IF
2208: IRWORK = IE + M
2209: NRU = 0
2210: IF( WNTUO .OR. WNTUAS )
2211: $ NRU = M
2212: *
2213: * Perform bidiagonal QR iteration, computing left singular
2214: * vectors of A in A if desired
2215: * (CWorkspace: 0)
2216: * (RWorkspace: need BDSPAC)
2217: *
2218: CALL ZBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1,
2219: $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
2220: *
2221: * If left singular vectors desired in U, copy them there
2222: *
2223: IF( WNTUAS )
2224: $ CALL ZLACPY( 'F', M, M, A, LDA, U, LDU )
2225: *
2226: ELSE IF( WNTVO .AND. WNTUN ) THEN
2227: *
2228: * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2229: * M right singular vectors to be overwritten on A and
2230: * no left singular vectors to be computed
2231: *
2232: IF( LWORK.GE.M*M+3*M ) THEN
2233: *
2234: * Sufficient workspace for a fast algorithm
2235: *
2236: IR = 1
2237: IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2238: *
2239: * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2240: *
2241: LDWRKU = LDA
2242: CHUNK = N
2243: LDWRKR = LDA
2244: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2245: *
2246: * WORK(IU) is LDA by N and WORK(IR) is M by M
2247: *
2248: LDWRKU = LDA
2249: CHUNK = N
2250: LDWRKR = M
2251: ELSE
2252: *
2253: * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2254: *
2255: LDWRKU = M
2256: CHUNK = ( LWORK-M*M ) / M
2257: LDWRKR = M
2258: END IF
2259: ITAU = IR + LDWRKR*M
2260: IWORK = ITAU + M
2261: *
2262: * Compute A=L*Q
2263: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2264: * (RWorkspace: 0)
2265: *
2266: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2267: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2268: *
2269: * Copy L to WORK(IR) and zero out above it
2270: *
2271: CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2272: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2273: $ WORK( IR+LDWRKR ), LDWRKR )
2274: *
2275: * Generate Q in A
2276: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2277: * (RWorkspace: 0)
2278: *
2279: CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2280: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2281: IE = 1
2282: ITAUQ = ITAU
2283: ITAUP = ITAUQ + M
2284: IWORK = ITAUP + M
2285: *
2286: * Bidiagonalize L in WORK(IR)
2287: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2288: * (RWorkspace: need M)
2289: *
2290: CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ),
2291: $ WORK( ITAUQ ), WORK( ITAUP ),
2292: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2293: *
2294: * Generate right vectors bidiagonalizing L
2295: * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2296: * (RWorkspace: 0)
2297: *
2298: CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2299: $ WORK( ITAUP ), WORK( IWORK ),
2300: $ LWORK-IWORK+1, IERR )
2301: IRWORK = IE + M
2302: *
2303: * Perform bidiagonal QR iteration, computing right
2304: * singular vectors of L in WORK(IR)
2305: * (CWorkspace: need M*M)
2306: * (RWorkspace: need BDSPAC)
2307: *
2308: CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2309: $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2310: $ RWORK( IRWORK ), INFO )
2311: IU = ITAUQ
2312: *
2313: * Multiply right singular vectors of L in WORK(IR) by Q
2314: * in A, storing result in WORK(IU) and copying to A
2315: * (CWorkspace: need M*M+M, prefer M*M+M*N)
2316: * (RWorkspace: 0)
2317: *
2318: DO 30 I = 1, N, CHUNK
2319: BLK = MIN( N-I+1, CHUNK )
2320: CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2321: $ LDWRKR, A( 1, I ), LDA, CZERO,
2322: $ WORK( IU ), LDWRKU )
2323: CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2324: $ A( 1, I ), LDA )
2325: 30 CONTINUE
2326: *
2327: ELSE
2328: *
2329: * Insufficient workspace for a fast algorithm
2330: *
2331: IE = 1
2332: ITAUQ = 1
2333: ITAUP = ITAUQ + M
2334: IWORK = ITAUP + M
2335: *
2336: * Bidiagonalize A
2337: * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2338: * (RWorkspace: need M)
2339: *
2340: CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ),
2341: $ WORK( ITAUQ ), WORK( ITAUP ),
2342: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2343: *
2344: * Generate right vectors bidiagonalizing A
2345: * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2346: * (RWorkspace: 0)
2347: *
2348: CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2349: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2350: IRWORK = IE + M
2351: *
2352: * Perform bidiagonal QR iteration, computing right
2353: * singular vectors of A in A
2354: * (CWorkspace: 0)
2355: * (RWorkspace: need BDSPAC)
2356: *
2357: CALL ZBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA,
2358: $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
2359: *
2360: END IF
2361: *
2362: ELSE IF( WNTVO .AND. WNTUAS ) THEN
2363: *
2364: * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2365: * M right singular vectors to be overwritten on A and
2366: * M left singular vectors to be computed in U
2367: *
2368: IF( LWORK.GE.M*M+3*M ) THEN
2369: *
2370: * Sufficient workspace for a fast algorithm
2371: *
2372: IR = 1
2373: IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2374: *
2375: * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2376: *
2377: LDWRKU = LDA
2378: CHUNK = N
2379: LDWRKR = LDA
2380: ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2381: *
2382: * WORK(IU) is LDA by N and WORK(IR) is M by M
2383: *
2384: LDWRKU = LDA
2385: CHUNK = N
2386: LDWRKR = M
2387: ELSE
2388: *
2389: * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2390: *
2391: LDWRKU = M
2392: CHUNK = ( LWORK-M*M ) / M
2393: LDWRKR = M
2394: END IF
2395: ITAU = IR + LDWRKR*M
2396: IWORK = ITAU + M
2397: *
2398: * Compute A=L*Q
2399: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2400: * (RWorkspace: 0)
2401: *
2402: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2403: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2404: *
2405: * Copy L to U, zeroing about above it
2406: *
2407: CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
2408: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2409: $ LDU )
2410: *
2411: * Generate Q in A
2412: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2413: * (RWorkspace: 0)
2414: *
2415: CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2416: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2417: IE = 1
2418: ITAUQ = ITAU
2419: ITAUP = ITAUQ + M
2420: IWORK = ITAUP + M
2421: *
2422: * Bidiagonalize L in U, copying result to WORK(IR)
2423: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2424: * (RWorkspace: need M)
2425: *
2426: CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
2427: $ WORK( ITAUQ ), WORK( ITAUP ),
2428: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2429: CALL ZLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2430: *
2431: * Generate right vectors bidiagonalizing L in WORK(IR)
2432: * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2433: * (RWorkspace: 0)
2434: *
2435: CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2436: $ WORK( ITAUP ), WORK( IWORK ),
2437: $ LWORK-IWORK+1, IERR )
2438: *
2439: * Generate left vectors bidiagonalizing L in U
2440: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2441: * (RWorkspace: 0)
2442: *
2443: CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2444: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2445: IRWORK = IE + M
2446: *
2447: * Perform bidiagonal QR iteration, computing left
2448: * singular vectors of L in U, and computing right
2449: * singular vectors of L in WORK(IR)
2450: * (CWorkspace: need M*M)
2451: * (RWorkspace: need BDSPAC)
2452: *
2453: CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2454: $ WORK( IR ), LDWRKR, U, LDU, CDUM, 1,
2455: $ RWORK( IRWORK ), INFO )
2456: IU = ITAUQ
2457: *
2458: * Multiply right singular vectors of L in WORK(IR) by Q
2459: * in A, storing result in WORK(IU) and copying to A
2460: * (CWorkspace: need M*M+M, prefer M*M+M*N))
2461: * (RWorkspace: 0)
2462: *
2463: DO 40 I = 1, N, CHUNK
2464: BLK = MIN( N-I+1, CHUNK )
2465: CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2466: $ LDWRKR, A( 1, I ), LDA, CZERO,
2467: $ WORK( IU ), LDWRKU )
2468: CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2469: $ A( 1, I ), LDA )
2470: 40 CONTINUE
2471: *
2472: ELSE
2473: *
2474: * Insufficient workspace for a fast algorithm
2475: *
2476: ITAU = 1
2477: IWORK = ITAU + M
2478: *
2479: * Compute A=L*Q
2480: * (CWorkspace: need 2*M, prefer M+M*NB)
2481: * (RWorkspace: 0)
2482: *
2483: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2484: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2485: *
2486: * Copy L to U, zeroing out above it
2487: *
2488: CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
2489: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2490: $ LDU )
2491: *
2492: * Generate Q in A
2493: * (CWorkspace: need 2*M, prefer M+M*NB)
2494: * (RWorkspace: 0)
2495: *
2496: CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2497: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2498: IE = 1
2499: ITAUQ = ITAU
2500: ITAUP = ITAUQ + M
2501: IWORK = ITAUP + M
2502: *
2503: * Bidiagonalize L in U
2504: * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2505: * (RWorkspace: need M)
2506: *
2507: CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
2508: $ WORK( ITAUQ ), WORK( ITAUP ),
2509: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2510: *
2511: * Multiply right vectors bidiagonalizing L by Q in A
2512: * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2513: * (RWorkspace: 0)
2514: *
2515: CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
2516: $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2517: $ LWORK-IWORK+1, IERR )
2518: *
2519: * Generate left vectors bidiagonalizing L in U
2520: * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2521: * (RWorkspace: 0)
2522: *
2523: CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2524: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2525: IRWORK = IE + M
2526: *
2527: * Perform bidiagonal QR iteration, computing left
2528: * singular vectors of A in U and computing right
2529: * singular vectors of A in A
2530: * (CWorkspace: 0)
2531: * (RWorkspace: need BDSPAC)
2532: *
2533: CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA,
2534: $ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO )
2535: *
2536: END IF
2537: *
2538: ELSE IF( WNTVS ) THEN
2539: *
2540: IF( WNTUN ) THEN
2541: *
2542: * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2543: * M right singular vectors to be computed in VT and
2544: * no left singular vectors to be computed
2545: *
2546: IF( LWORK.GE.M*M+3*M ) THEN
2547: *
2548: * Sufficient workspace for a fast algorithm
2549: *
2550: IR = 1
2551: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2552: *
2553: * WORK(IR) is LDA by M
2554: *
2555: LDWRKR = LDA
2556: ELSE
2557: *
2558: * WORK(IR) is M by M
2559: *
2560: LDWRKR = M
2561: END IF
2562: ITAU = IR + LDWRKR*M
2563: IWORK = ITAU + M
2564: *
2565: * Compute A=L*Q
2566: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2567: * (RWorkspace: 0)
2568: *
2569: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2570: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2571: *
2572: * Copy L to WORK(IR), zeroing out above it
2573: *
2574: CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ),
2575: $ LDWRKR )
2576: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2577: $ WORK( IR+LDWRKR ), LDWRKR )
2578: *
2579: * Generate Q in A
2580: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2581: * (RWorkspace: 0)
2582: *
2583: CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2584: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2585: IE = 1
2586: ITAUQ = ITAU
2587: ITAUP = ITAUQ + M
2588: IWORK = ITAUP + M
2589: *
2590: * Bidiagonalize L in WORK(IR)
2591: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2592: * (RWorkspace: need M)
2593: *
2594: CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S,
2595: $ RWORK( IE ), WORK( ITAUQ ),
2596: $ WORK( ITAUP ), WORK( IWORK ),
2597: $ LWORK-IWORK+1, IERR )
2598: *
2599: * Generate right vectors bidiagonalizing L in
2600: * WORK(IR)
2601: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2602: * (RWorkspace: 0)
2603: *
2604: CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2605: $ WORK( ITAUP ), WORK( IWORK ),
2606: $ LWORK-IWORK+1, IERR )
2607: IRWORK = IE + M
2608: *
2609: * Perform bidiagonal QR iteration, computing right
2610: * singular vectors of L in WORK(IR)
2611: * (CWorkspace: need M*M)
2612: * (RWorkspace: need BDSPAC)
2613: *
2614: CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2615: $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2616: $ RWORK( IRWORK ), INFO )
2617: *
2618: * Multiply right singular vectors of L in WORK(IR) by
2619: * Q in A, storing result in VT
2620: * (CWorkspace: need M*M)
2621: * (RWorkspace: 0)
2622: *
2623: CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
2624: $ LDWRKR, A, LDA, CZERO, VT, LDVT )
2625: *
2626: ELSE
2627: *
2628: * Insufficient workspace for a fast algorithm
2629: *
2630: ITAU = 1
2631: IWORK = ITAU + M
2632: *
2633: * Compute A=L*Q
2634: * (CWorkspace: need 2*M, prefer M+M*NB)
2635: * (RWorkspace: 0)
2636: *
2637: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2638: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2639: *
2640: * Copy result to VT
2641: *
2642: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
2643: *
2644: * Generate Q in VT
2645: * (CWorkspace: need 2*M, prefer M+M*NB)
2646: * (RWorkspace: 0)
2647: *
2648: CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2649: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2650: IE = 1
2651: ITAUQ = ITAU
2652: ITAUP = ITAUQ + M
2653: IWORK = ITAUP + M
2654: *
2655: * Zero out above L in A
2656: *
2657: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2658: $ A( 1, 2 ), LDA )
2659: *
2660: * Bidiagonalize L in A
2661: * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2662: * (RWorkspace: need M)
2663: *
2664: CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
2665: $ WORK( ITAUQ ), WORK( ITAUP ),
2666: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2667: *
2668: * Multiply right vectors bidiagonalizing L by Q in VT
2669: * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2670: * (RWorkspace: 0)
2671: *
2672: CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2673: $ WORK( ITAUP ), VT, LDVT,
2674: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2675: IRWORK = IE + M
2676: *
2677: * Perform bidiagonal QR iteration, computing right
2678: * singular vectors of A in VT
2679: * (CWorkspace: 0)
2680: * (RWorkspace: need BDSPAC)
2681: *
2682: CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
2683: $ LDVT, CDUM, 1, CDUM, 1,
2684: $ RWORK( IRWORK ), INFO )
2685: *
2686: END IF
2687: *
2688: ELSE IF( WNTUO ) THEN
2689: *
2690: * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2691: * M right singular vectors to be computed in VT and
2692: * M left singular vectors to be overwritten on A
2693: *
2694: IF( LWORK.GE.2*M*M+3*M ) THEN
2695: *
2696: * Sufficient workspace for a fast algorithm
2697: *
2698: IU = 1
2699: IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2700: *
2701: * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2702: *
2703: LDWRKU = LDA
2704: IR = IU + LDWRKU*M
2705: LDWRKR = LDA
2706: ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2707: *
2708: * WORK(IU) is LDA by M and WORK(IR) is M by M
2709: *
2710: LDWRKU = LDA
2711: IR = IU + LDWRKU*M
2712: LDWRKR = M
2713: ELSE
2714: *
2715: * WORK(IU) is M by M and WORK(IR) is M by M
2716: *
2717: LDWRKU = M
2718: IR = IU + LDWRKU*M
2719: LDWRKR = M
2720: END IF
2721: ITAU = IR + LDWRKR*M
2722: IWORK = ITAU + M
2723: *
2724: * Compute A=L*Q
2725: * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2726: * (RWorkspace: 0)
2727: *
2728: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2729: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2730: *
2731: * Copy L to WORK(IU), zeroing out below it
2732: *
2733: CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
2734: $ LDWRKU )
2735: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2736: $ WORK( IU+LDWRKU ), LDWRKU )
2737: *
2738: * Generate Q in A
2739: * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2740: * (RWorkspace: 0)
2741: *
2742: CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2743: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2744: IE = 1
2745: ITAUQ = ITAU
2746: ITAUP = ITAUQ + M
2747: IWORK = ITAUP + M
2748: *
2749: * Bidiagonalize L in WORK(IU), copying result to
2750: * WORK(IR)
2751: * (CWorkspace: need 2*M*M+3*M,
2752: * prefer 2*M*M+2*M+2*M*NB)
2753: * (RWorkspace: need M)
2754: *
2755: CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
2756: $ RWORK( IE ), WORK( ITAUQ ),
2757: $ WORK( ITAUP ), WORK( IWORK ),
2758: $ LWORK-IWORK+1, IERR )
2759: CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2760: $ WORK( IR ), LDWRKR )
2761: *
2762: * Generate right bidiagonalizing vectors in WORK(IU)
2763: * (CWorkspace: need 2*M*M+3*M-1,
2764: * prefer 2*M*M+2*M+(M-1)*NB)
2765: * (RWorkspace: 0)
2766: *
2767: CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2768: $ WORK( ITAUP ), WORK( IWORK ),
2769: $ LWORK-IWORK+1, IERR )
2770: *
2771: * Generate left bidiagonalizing vectors in WORK(IR)
2772: * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2773: * (RWorkspace: 0)
2774: *
2775: CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2776: $ WORK( ITAUQ ), WORK( IWORK ),
2777: $ LWORK-IWORK+1, IERR )
2778: IRWORK = IE + M
2779: *
2780: * Perform bidiagonal QR iteration, computing left
2781: * singular vectors of L in WORK(IR) and computing
2782: * right singular vectors of L in WORK(IU)
2783: * (CWorkspace: need 2*M*M)
2784: * (RWorkspace: need BDSPAC)
2785: *
2786: CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2787: $ WORK( IU ), LDWRKU, WORK( IR ),
2788: $ LDWRKR, CDUM, 1, RWORK( IRWORK ),
2789: $ INFO )
2790: *
2791: * Multiply right singular vectors of L in WORK(IU) by
2792: * Q in A, storing result in VT
2793: * (CWorkspace: need M*M)
2794: * (RWorkspace: 0)
2795: *
2796: CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2797: $ LDWRKU, A, LDA, CZERO, VT, LDVT )
2798: *
2799: * Copy left singular vectors of L to A
2800: * (CWorkspace: need M*M)
2801: * (RWorkspace: 0)
2802: *
2803: CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2804: $ LDA )
2805: *
2806: ELSE
2807: *
2808: * Insufficient workspace for a fast algorithm
2809: *
2810: ITAU = 1
2811: IWORK = ITAU + M
2812: *
2813: * Compute A=L*Q, copying result to VT
2814: * (CWorkspace: need 2*M, prefer M+M*NB)
2815: * (RWorkspace: 0)
2816: *
2817: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2818: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2819: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
2820: *
2821: * Generate Q in VT
2822: * (CWorkspace: need 2*M, prefer M+M*NB)
2823: * (RWorkspace: 0)
2824: *
2825: CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2826: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2827: IE = 1
2828: ITAUQ = ITAU
2829: ITAUP = ITAUQ + M
2830: IWORK = ITAUP + M
2831: *
2832: * Zero out above L in A
2833: *
2834: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2835: $ A( 1, 2 ), LDA )
2836: *
2837: * Bidiagonalize L in A
2838: * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2839: * (RWorkspace: need M)
2840: *
2841: CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
2842: $ WORK( ITAUQ ), WORK( ITAUP ),
2843: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2844: *
2845: * Multiply right vectors bidiagonalizing L by Q in VT
2846: * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2847: * (RWorkspace: 0)
2848: *
2849: CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2850: $ WORK( ITAUP ), VT, LDVT,
2851: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2852: *
2853: * Generate left bidiagonalizing vectors of L in A
2854: * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2855: * (RWorkspace: 0)
2856: *
2857: CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2858: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2859: IRWORK = IE + M
2860: *
2861: * Perform bidiagonal QR iteration, computing left
2862: * singular vectors of A in A and computing right
2863: * singular vectors of A in VT
2864: * (CWorkspace: 0)
2865: * (RWorkspace: need BDSPAC)
2866: *
2867: CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
2868: $ LDVT, A, LDA, CDUM, 1,
2869: $ RWORK( IRWORK ), INFO )
2870: *
2871: END IF
2872: *
2873: ELSE IF( WNTUAS ) THEN
2874: *
2875: * Path 6t(N much larger than M, JOBU='S' or 'A',
2876: * JOBVT='S')
2877: * M right singular vectors to be computed in VT and
2878: * M left singular vectors to be computed in U
2879: *
2880: IF( LWORK.GE.M*M+3*M ) THEN
2881: *
2882: * Sufficient workspace for a fast algorithm
2883: *
2884: IU = 1
2885: IF( LWORK.GE.WRKBL+LDA*M ) THEN
2886: *
2887: * WORK(IU) is LDA by N
2888: *
2889: LDWRKU = LDA
2890: ELSE
2891: *
2892: * WORK(IU) is LDA by M
2893: *
2894: LDWRKU = M
2895: END IF
2896: ITAU = IU + LDWRKU*M
2897: IWORK = ITAU + M
2898: *
2899: * Compute A=L*Q
2900: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2901: * (RWorkspace: 0)
2902: *
2903: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2904: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2905: *
2906: * Copy L to WORK(IU), zeroing out above it
2907: *
2908: CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
2909: $ LDWRKU )
2910: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2911: $ WORK( IU+LDWRKU ), LDWRKU )
2912: *
2913: * Generate Q in A
2914: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2915: * (RWorkspace: 0)
2916: *
2917: CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2918: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2919: IE = 1
2920: ITAUQ = ITAU
2921: ITAUP = ITAUQ + M
2922: IWORK = ITAUP + M
2923: *
2924: * Bidiagonalize L in WORK(IU), copying result to U
2925: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2926: * (RWorkspace: need M)
2927: *
2928: CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
2929: $ RWORK( IE ), WORK( ITAUQ ),
2930: $ WORK( ITAUP ), WORK( IWORK ),
2931: $ LWORK-IWORK+1, IERR )
2932: CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2933: $ LDU )
2934: *
2935: * Generate right bidiagonalizing vectors in WORK(IU)
2936: * (CWorkspace: need M*M+3*M-1,
2937: * prefer M*M+2*M+(M-1)*NB)
2938: * (RWorkspace: 0)
2939: *
2940: CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2941: $ WORK( ITAUP ), WORK( IWORK ),
2942: $ LWORK-IWORK+1, IERR )
2943: *
2944: * Generate left bidiagonalizing vectors in U
2945: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2946: * (RWorkspace: 0)
2947: *
2948: CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2949: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2950: IRWORK = IE + M
2951: *
2952: * Perform bidiagonal QR iteration, computing left
2953: * singular vectors of L in U and computing right
2954: * singular vectors of L in WORK(IU)
2955: * (CWorkspace: need M*M)
2956: * (RWorkspace: need BDSPAC)
2957: *
2958: CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2959: $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
2960: $ RWORK( IRWORK ), INFO )
2961: *
2962: * Multiply right singular vectors of L in WORK(IU) by
2963: * Q in A, storing result in VT
2964: * (CWorkspace: need M*M)
2965: * (RWorkspace: 0)
2966: *
2967: CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2968: $ LDWRKU, A, LDA, CZERO, VT, LDVT )
2969: *
2970: ELSE
2971: *
2972: * Insufficient workspace for a fast algorithm
2973: *
2974: ITAU = 1
2975: IWORK = ITAU + M
2976: *
2977: * Compute A=L*Q, copying result to VT
2978: * (CWorkspace: need 2*M, prefer M+M*NB)
2979: * (RWorkspace: 0)
2980: *
2981: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
2982: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2983: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
2984: *
2985: * Generate Q in VT
2986: * (CWorkspace: need 2*M, prefer M+M*NB)
2987: * (RWorkspace: 0)
2988: *
2989: CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2990: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2991: *
2992: * Copy L to U, zeroing out above it
2993: *
2994: CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
2995: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
2996: $ U( 1, 2 ), LDU )
2997: IE = 1
2998: ITAUQ = ITAU
2999: ITAUP = ITAUQ + M
3000: IWORK = ITAUP + M
3001: *
3002: * Bidiagonalize L in U
3003: * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3004: * (RWorkspace: need M)
3005: *
3006: CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
3007: $ WORK( ITAUQ ), WORK( ITAUP ),
3008: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3009: *
3010: * Multiply right bidiagonalizing vectors in U by Q
3011: * in VT
3012: * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3013: * (RWorkspace: 0)
3014: *
3015: CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
3016: $ WORK( ITAUP ), VT, LDVT,
3017: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3018: *
3019: * Generate left bidiagonalizing vectors in U
3020: * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3021: * (RWorkspace: 0)
3022: *
3023: CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3024: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3025: IRWORK = IE + M
3026: *
3027: * Perform bidiagonal QR iteration, computing left
3028: * singular vectors of A in U and computing right
3029: * singular vectors of A in VT
3030: * (CWorkspace: 0)
3031: * (RWorkspace: need BDSPAC)
3032: *
3033: CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3034: $ LDVT, U, LDU, CDUM, 1,
3035: $ RWORK( IRWORK ), INFO )
3036: *
3037: END IF
3038: *
3039: END IF
3040: *
3041: ELSE IF( WNTVA ) THEN
3042: *
3043: IF( WNTUN ) THEN
3044: *
3045: * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3046: * N right singular vectors to be computed in VT and
3047: * no left singular vectors to be computed
3048: *
3049: IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
3050: *
3051: * Sufficient workspace for a fast algorithm
3052: *
3053: IR = 1
3054: IF( LWORK.GE.WRKBL+LDA*M ) THEN
3055: *
3056: * WORK(IR) is LDA by M
3057: *
3058: LDWRKR = LDA
3059: ELSE
3060: *
3061: * WORK(IR) is M by M
3062: *
3063: LDWRKR = M
3064: END IF
3065: ITAU = IR + LDWRKR*M
3066: IWORK = ITAU + M
3067: *
3068: * Compute A=L*Q, copying result to VT
3069: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3070: * (RWorkspace: 0)
3071: *
3072: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3073: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3074: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3075: *
3076: * Copy L to WORK(IR), zeroing out above it
3077: *
3078: CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ),
3079: $ LDWRKR )
3080: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3081: $ WORK( IR+LDWRKR ), LDWRKR )
3082: *
3083: * Generate Q in VT
3084: * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3085: * (RWorkspace: 0)
3086: *
3087: CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3088: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3089: IE = 1
3090: ITAUQ = ITAU
3091: ITAUP = ITAUQ + M
3092: IWORK = ITAUP + M
3093: *
3094: * Bidiagonalize L in WORK(IR)
3095: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3096: * (RWorkspace: need M)
3097: *
3098: CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S,
3099: $ RWORK( IE ), WORK( ITAUQ ),
3100: $ WORK( ITAUP ), WORK( IWORK ),
3101: $ LWORK-IWORK+1, IERR )
3102: *
3103: * Generate right bidiagonalizing vectors in WORK(IR)
3104: * (CWorkspace: need M*M+3*M-1,
3105: * prefer M*M+2*M+(M-1)*NB)
3106: * (RWorkspace: 0)
3107: *
3108: CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
3109: $ WORK( ITAUP ), WORK( IWORK ),
3110: $ LWORK-IWORK+1, IERR )
3111: IRWORK = IE + M
3112: *
3113: * Perform bidiagonal QR iteration, computing right
3114: * singular vectors of L in WORK(IR)
3115: * (CWorkspace: need M*M)
3116: * (RWorkspace: need BDSPAC)
3117: *
3118: CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
3119: $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
3120: $ RWORK( IRWORK ), INFO )
3121: *
3122: * Multiply right singular vectors of L in WORK(IR) by
3123: * Q in VT, storing result in A
3124: * (CWorkspace: need M*M)
3125: * (RWorkspace: 0)
3126: *
3127: CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
3128: $ LDWRKR, VT, LDVT, CZERO, A, LDA )
3129: *
3130: * Copy right singular vectors of A from A to VT
3131: *
3132: CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
3133: *
3134: ELSE
3135: *
3136: * Insufficient workspace for a fast algorithm
3137: *
3138: ITAU = 1
3139: IWORK = ITAU + M
3140: *
3141: * Compute A=L*Q, copying result to VT
3142: * (CWorkspace: need 2*M, prefer M+M*NB)
3143: * (RWorkspace: 0)
3144: *
3145: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3146: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3147: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3148: *
3149: * Generate Q in VT
3150: * (CWorkspace: need M+N, prefer M+N*NB)
3151: * (RWorkspace: 0)
3152: *
3153: CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3154: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3155: IE = 1
3156: ITAUQ = ITAU
3157: ITAUP = ITAUQ + M
3158: IWORK = ITAUP + M
3159: *
3160: * Zero out above L in A
3161: *
3162: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3163: $ A( 1, 2 ), LDA )
3164: *
3165: * Bidiagonalize L in A
3166: * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3167: * (RWorkspace: need M)
3168: *
3169: CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
3170: $ WORK( ITAUQ ), WORK( ITAUP ),
3171: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3172: *
3173: * Multiply right bidiagonalizing vectors in A by Q
3174: * in VT
3175: * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3176: * (RWorkspace: 0)
3177: *
3178: CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3179: $ WORK( ITAUP ), VT, LDVT,
3180: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3181: IRWORK = IE + M
3182: *
3183: * Perform bidiagonal QR iteration, computing right
3184: * singular vectors of A in VT
3185: * (CWorkspace: 0)
3186: * (RWorkspace: need BDSPAC)
3187: *
3188: CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
3189: $ LDVT, CDUM, 1, CDUM, 1,
3190: $ RWORK( IRWORK ), INFO )
3191: *
3192: END IF
3193: *
3194: ELSE IF( WNTUO ) THEN
3195: *
3196: * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3197: * N right singular vectors to be computed in VT and
3198: * M left singular vectors to be overwritten on A
3199: *
3200: IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN
3201: *
3202: * Sufficient workspace for a fast algorithm
3203: *
3204: IU = 1
3205: IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3206: *
3207: * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3208: *
3209: LDWRKU = LDA
3210: IR = IU + LDWRKU*M
3211: LDWRKR = LDA
3212: ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
3213: *
3214: * WORK(IU) is LDA by M and WORK(IR) is M by M
3215: *
3216: LDWRKU = LDA
3217: IR = IU + LDWRKU*M
3218: LDWRKR = M
3219: ELSE
3220: *
3221: * WORK(IU) is M by M and WORK(IR) is M by M
3222: *
3223: LDWRKU = M
3224: IR = IU + LDWRKU*M
3225: LDWRKR = M
3226: END IF
3227: ITAU = IR + LDWRKR*M
3228: IWORK = ITAU + M
3229: *
3230: * Compute A=L*Q, copying result to VT
3231: * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3232: * (RWorkspace: 0)
3233: *
3234: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3235: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3236: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3237: *
3238: * Generate Q in VT
3239: * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3240: * (RWorkspace: 0)
3241: *
3242: CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3243: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3244: *
3245: * Copy L to WORK(IU), zeroing out above it
3246: *
3247: CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
3248: $ LDWRKU )
3249: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3250: $ WORK( IU+LDWRKU ), LDWRKU )
3251: IE = 1
3252: ITAUQ = ITAU
3253: ITAUP = ITAUQ + M
3254: IWORK = ITAUP + M
3255: *
3256: * Bidiagonalize L in WORK(IU), copying result to
3257: * WORK(IR)
3258: * (CWorkspace: need 2*M*M+3*M,
3259: * prefer 2*M*M+2*M+2*M*NB)
3260: * (RWorkspace: need M)
3261: *
3262: CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
3263: $ RWORK( IE ), WORK( ITAUQ ),
3264: $ WORK( ITAUP ), WORK( IWORK ),
3265: $ LWORK-IWORK+1, IERR )
3266: CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3267: $ WORK( IR ), LDWRKR )
3268: *
3269: * Generate right bidiagonalizing vectors in WORK(IU)
3270: * (CWorkspace: need 2*M*M+3*M-1,
3271: * prefer 2*M*M+2*M+(M-1)*NB)
3272: * (RWorkspace: 0)
3273: *
3274: CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3275: $ WORK( ITAUP ), WORK( IWORK ),
3276: $ LWORK-IWORK+1, IERR )
3277: *
3278: * Generate left bidiagonalizing vectors in WORK(IR)
3279: * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3280: * (RWorkspace: 0)
3281: *
3282: CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3283: $ WORK( ITAUQ ), WORK( IWORK ),
3284: $ LWORK-IWORK+1, IERR )
3285: IRWORK = IE + M
3286: *
3287: * Perform bidiagonal QR iteration, computing left
3288: * singular vectors of L in WORK(IR) and computing
3289: * right singular vectors of L in WORK(IU)
3290: * (CWorkspace: need 2*M*M)
3291: * (RWorkspace: need BDSPAC)
3292: *
3293: CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3294: $ WORK( IU ), LDWRKU, WORK( IR ),
3295: $ LDWRKR, CDUM, 1, RWORK( IRWORK ),
3296: $ INFO )
3297: *
3298: * Multiply right singular vectors of L in WORK(IU) by
3299: * Q in VT, storing result in A
3300: * (CWorkspace: need M*M)
3301: * (RWorkspace: 0)
3302: *
3303: CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3304: $ LDWRKU, VT, LDVT, CZERO, A, LDA )
3305: *
3306: * Copy right singular vectors of A from A to VT
3307: *
3308: CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
3309: *
3310: * Copy left singular vectors of A from WORK(IR) to A
3311: *
3312: CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3313: $ LDA )
3314: *
3315: ELSE
3316: *
3317: * Insufficient workspace for a fast algorithm
3318: *
3319: ITAU = 1
3320: IWORK = ITAU + M
3321: *
3322: * Compute A=L*Q, copying result to VT
3323: * (CWorkspace: need 2*M, prefer M+M*NB)
3324: * (RWorkspace: 0)
3325: *
3326: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3327: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3328: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3329: *
3330: * Generate Q in VT
3331: * (CWorkspace: need M+N, prefer M+N*NB)
3332: * (RWorkspace: 0)
3333: *
3334: CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3335: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3336: IE = 1
3337: ITAUQ = ITAU
3338: ITAUP = ITAUQ + M
3339: IWORK = ITAUP + M
3340: *
3341: * Zero out above L in A
3342: *
3343: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3344: $ A( 1, 2 ), LDA )
3345: *
3346: * Bidiagonalize L in A
3347: * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3348: * (RWorkspace: need M)
3349: *
3350: CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
3351: $ WORK( ITAUQ ), WORK( ITAUP ),
3352: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3353: *
3354: * Multiply right bidiagonalizing vectors in A by Q
3355: * in VT
3356: * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3357: * (RWorkspace: 0)
3358: *
3359: CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3360: $ WORK( ITAUP ), VT, LDVT,
3361: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3362: *
3363: * Generate left bidiagonalizing vectors in A
3364: * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3365: * (RWorkspace: 0)
3366: *
3367: CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3368: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3369: IRWORK = IE + M
3370: *
3371: * Perform bidiagonal QR iteration, computing left
3372: * singular vectors of A in A and computing right
3373: * singular vectors of A in VT
3374: * (CWorkspace: 0)
3375: * (RWorkspace: need BDSPAC)
3376: *
3377: CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3378: $ LDVT, A, LDA, CDUM, 1,
3379: $ RWORK( IRWORK ), INFO )
3380: *
3381: END IF
3382: *
3383: ELSE IF( WNTUAS ) THEN
3384: *
3385: * Path 9t(N much larger than M, JOBU='S' or 'A',
3386: * JOBVT='A')
3387: * N right singular vectors to be computed in VT and
3388: * M left singular vectors to be computed in U
3389: *
3390: IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
3391: *
3392: * Sufficient workspace for a fast algorithm
3393: *
3394: IU = 1
3395: IF( LWORK.GE.WRKBL+LDA*M ) THEN
3396: *
3397: * WORK(IU) is LDA by M
3398: *
3399: LDWRKU = LDA
3400: ELSE
3401: *
3402: * WORK(IU) is M by M
3403: *
3404: LDWRKU = M
3405: END IF
3406: ITAU = IU + LDWRKU*M
3407: IWORK = ITAU + M
3408: *
3409: * Compute A=L*Q, copying result to VT
3410: * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3411: * (RWorkspace: 0)
3412: *
3413: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3414: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3415: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3416: *
3417: * Generate Q in VT
3418: * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3419: * (RWorkspace: 0)
3420: *
3421: CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3422: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3423: *
3424: * Copy L to WORK(IU), zeroing out above it
3425: *
3426: CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
3427: $ LDWRKU )
3428: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3429: $ WORK( IU+LDWRKU ), LDWRKU )
3430: IE = 1
3431: ITAUQ = ITAU
3432: ITAUP = ITAUQ + M
3433: IWORK = ITAUP + M
3434: *
3435: * Bidiagonalize L in WORK(IU), copying result to U
3436: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3437: * (RWorkspace: need M)
3438: *
3439: CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
3440: $ RWORK( IE ), WORK( ITAUQ ),
3441: $ WORK( ITAUP ), WORK( IWORK ),
3442: $ LWORK-IWORK+1, IERR )
3443: CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3444: $ LDU )
3445: *
3446: * Generate right bidiagonalizing vectors in WORK(IU)
3447: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3448: * (RWorkspace: 0)
3449: *
3450: CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3451: $ WORK( ITAUP ), WORK( IWORK ),
3452: $ LWORK-IWORK+1, IERR )
3453: *
3454: * Generate left bidiagonalizing vectors in U
3455: * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3456: * (RWorkspace: 0)
3457: *
3458: CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3459: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3460: IRWORK = IE + M
3461: *
3462: * Perform bidiagonal QR iteration, computing left
3463: * singular vectors of L in U and computing right
3464: * singular vectors of L in WORK(IU)
3465: * (CWorkspace: need M*M)
3466: * (RWorkspace: need BDSPAC)
3467: *
3468: CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3469: $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
3470: $ RWORK( IRWORK ), INFO )
3471: *
3472: * Multiply right singular vectors of L in WORK(IU) by
3473: * Q in VT, storing result in A
3474: * (CWorkspace: need M*M)
3475: * (RWorkspace: 0)
3476: *
3477: CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3478: $ LDWRKU, VT, LDVT, CZERO, A, LDA )
3479: *
3480: * Copy right singular vectors of A from A to VT
3481: *
3482: CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
3483: *
3484: ELSE
3485: *
3486: * Insufficient workspace for a fast algorithm
3487: *
3488: ITAU = 1
3489: IWORK = ITAU + M
3490: *
3491: * Compute A=L*Q, copying result to VT
3492: * (CWorkspace: need 2*M, prefer M+M*NB)
3493: * (RWorkspace: 0)
3494: *
3495: CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
3496: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3497: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3498: *
3499: * Generate Q in VT
3500: * (CWorkspace: need M+N, prefer M+N*NB)
3501: * (RWorkspace: 0)
3502: *
3503: CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3504: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3505: *
3506: * Copy L to U, zeroing out above it
3507: *
3508: CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
3509: CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
3510: $ U( 1, 2 ), LDU )
3511: IE = 1
3512: ITAUQ = ITAU
3513: ITAUP = ITAUQ + M
3514: IWORK = ITAUP + M
3515: *
3516: * Bidiagonalize L in U
3517: * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3518: * (RWorkspace: need M)
3519: *
3520: CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
3521: $ WORK( ITAUQ ), WORK( ITAUP ),
3522: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3523: *
3524: * Multiply right bidiagonalizing vectors in U by Q
3525: * in VT
3526: * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3527: * (RWorkspace: 0)
3528: *
3529: CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
3530: $ WORK( ITAUP ), VT, LDVT,
3531: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3532: *
3533: * Generate left bidiagonalizing vectors in U
3534: * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3535: * (RWorkspace: 0)
3536: *
3537: CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3538: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3539: IRWORK = IE + M
3540: *
3541: * Perform bidiagonal QR iteration, computing left
3542: * singular vectors of A in U and computing right
3543: * singular vectors of A in VT
3544: * (CWorkspace: 0)
3545: * (RWorkspace: need BDSPAC)
3546: *
3547: CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3548: $ LDVT, U, LDU, CDUM, 1,
3549: $ RWORK( IRWORK ), INFO )
3550: *
3551: END IF
3552: *
3553: END IF
3554: *
3555: END IF
3556: *
3557: ELSE
3558: *
3559: * N .LT. MNTHR
3560: *
3561: * Path 10t(N greater than M, but not much larger)
3562: * Reduce to bidiagonal form without LQ decomposition
3563: *
3564: IE = 1
3565: ITAUQ = 1
3566: ITAUP = ITAUQ + M
3567: IWORK = ITAUP + M
3568: *
3569: * Bidiagonalize A
3570: * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3571: * (RWorkspace: M)
3572: *
3573: CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
3574: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3575: $ IERR )
3576: IF( WNTUAS ) THEN
3577: *
3578: * If left singular vectors desired in U, copy result to U
3579: * and generate left bidiagonalizing vectors in U
3580: * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3581: * (RWorkspace: 0)
3582: *
3583: CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
3584: CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3585: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3586: END IF
3587: IF( WNTVAS ) THEN
3588: *
3589: * If right singular vectors desired in VT, copy result to
3590: * VT and generate right bidiagonalizing vectors in VT
3591: * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3592: * (RWorkspace: 0)
3593: *
3594: CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
3595: IF( WNTVA )
3596: $ NRVT = N
3597: IF( WNTVS )
3598: $ NRVT = M
3599: CALL ZUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3600: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3601: END IF
3602: IF( WNTUO ) THEN
3603: *
3604: * If left singular vectors desired in A, generate left
3605: * bidiagonalizing vectors in A
3606: * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3607: * (RWorkspace: 0)
3608: *
3609: CALL ZUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3610: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3611: END IF
3612: IF( WNTVO ) THEN
3613: *
3614: * If right singular vectors desired in A, generate right
3615: * bidiagonalizing vectors in A
3616: * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3617: * (RWorkspace: 0)
3618: *
3619: CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3620: $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3621: END IF
3622: IRWORK = IE + M
3623: IF( WNTUAS .OR. WNTUO )
3624: $ NRU = M
3625: IF( WNTUN )
3626: $ NRU = 0
3627: IF( WNTVAS .OR. WNTVO )
3628: $ NCVT = N
3629: IF( WNTVN )
3630: $ NCVT = 0
3631: IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3632: *
3633: * Perform bidiagonal QR iteration, if desired, computing
3634: * left singular vectors in U and computing right singular
3635: * vectors in VT
3636: * (CWorkspace: 0)
3637: * (RWorkspace: need BDSPAC)
3638: *
3639: CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3640: $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
3641: $ INFO )
3642: ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3643: *
3644: * Perform bidiagonal QR iteration, if desired, computing
3645: * left singular vectors in U and computing right singular
3646: * vectors in A
3647: * (CWorkspace: 0)
3648: * (RWorkspace: need BDSPAC)
3649: *
3650: CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A,
3651: $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
3652: $ INFO )
3653: ELSE
3654: *
3655: * Perform bidiagonal QR iteration, if desired, computing
3656: * left singular vectors in A and computing right singular
3657: * vectors in VT
3658: * (CWorkspace: 0)
3659: * (RWorkspace: need BDSPAC)
3660: *
3661: CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3662: $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
3663: $ INFO )
3664: END IF
3665: *
3666: END IF
3667: *
3668: END IF
3669: *
3670: * Undo scaling if necessary
3671: *
3672: IF( ISCL.EQ.1 ) THEN
3673: IF( ANRM.GT.BIGNUM )
3674: $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3675: $ IERR )
3676: IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3677: $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1,
3678: $ RWORK( IE ), MINMN, IERR )
3679: IF( ANRM.LT.SMLNUM )
3680: $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3681: $ IERR )
3682: IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3683: $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1,
3684: $ RWORK( IE ), MINMN, IERR )
3685: END IF
3686: *
3687: * Return optimal workspace in WORK(1)
3688: *
3689: WORK( 1 ) = MAXWRK
3690: *
3691: RETURN
3692: *
3693: * End of ZGESVD
3694: *
3695: END
CVSweb interface <joel.bertrand@systella.fr>