Annotation of rpl/lapack/lapack/dgelss.f, revision 1.18
1.9 bertrand 1: *> \brief <b> DGELSS solves overdetermined or underdetermined systems for GE matrices</b>
2: *
3: * =========== DOCUMENTATION ===========
4: *
1.15 bertrand 5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
1.9 bertrand 7: *
8: *> \htmlonly
1.15 bertrand 9: *> Download DGELSS + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelss.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelss.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelss.f">
1.9 bertrand 15: *> [TXT]</a>
1.15 bertrand 16: *> \endhtmlonly
1.9 bertrand 17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
22: * WORK, LWORK, INFO )
1.15 bertrand 23: *
1.9 bertrand 24: * .. Scalar Arguments ..
25: * INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
26: * DOUBLE PRECISION RCOND
27: * ..
28: * .. Array Arguments ..
29: * DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
30: * ..
1.15 bertrand 31: *
1.9 bertrand 32: *
33: *> \par Purpose:
34: * =============
35: *>
36: *> \verbatim
37: *>
38: *> DGELSS computes the minimum norm solution to a real linear least
39: *> squares problem:
40: *>
41: *> Minimize 2-norm(| b - A*x |).
42: *>
43: *> using the singular value decomposition (SVD) of A. A is an M-by-N
44: *> matrix which may be rank-deficient.
45: *>
46: *> Several right hand side vectors b and solution vectors x can be
47: *> handled in a single call; they are stored as the columns of the
48: *> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
49: *> X.
50: *>
51: *> The effective rank of A is determined by treating as zero those
52: *> singular values which are less than RCOND times the largest singular
53: *> value.
54: *> \endverbatim
55: *
56: * Arguments:
57: * ==========
58: *
59: *> \param[in] M
60: *> \verbatim
61: *> M is INTEGER
62: *> The number of rows of the matrix A. M >= 0.
63: *> \endverbatim
64: *>
65: *> \param[in] N
66: *> \verbatim
67: *> N is INTEGER
68: *> The number of columns of the matrix A. N >= 0.
69: *> \endverbatim
70: *>
71: *> \param[in] NRHS
72: *> \verbatim
73: *> NRHS is INTEGER
74: *> The number of right hand sides, i.e., the number of columns
75: *> of the matrices B and X. NRHS >= 0.
76: *> \endverbatim
77: *>
78: *> \param[in,out] A
79: *> \verbatim
80: *> A is DOUBLE PRECISION array, dimension (LDA,N)
81: *> On entry, the M-by-N matrix A.
82: *> On exit, the first min(m,n) rows of A are overwritten with
83: *> its right singular vectors, stored rowwise.
84: *> \endverbatim
85: *>
86: *> \param[in] LDA
87: *> \verbatim
88: *> LDA is INTEGER
89: *> The leading dimension of the array A. LDA >= max(1,M).
90: *> \endverbatim
91: *>
92: *> \param[in,out] B
93: *> \verbatim
94: *> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
95: *> On entry, the M-by-NRHS right hand side matrix B.
96: *> On exit, B is overwritten by the N-by-NRHS solution
97: *> matrix X. If m >= n and RANK = n, the residual
98: *> sum-of-squares for the solution in the i-th column is given
99: *> by the sum of squares of elements n+1:m in that column.
100: *> \endverbatim
101: *>
102: *> \param[in] LDB
103: *> \verbatim
104: *> LDB is INTEGER
105: *> The leading dimension of the array B. LDB >= max(1,max(M,N)).
106: *> \endverbatim
107: *>
108: *> \param[out] S
109: *> \verbatim
110: *> S is DOUBLE PRECISION array, dimension (min(M,N))
111: *> The singular values of A in decreasing order.
112: *> The condition number of A in the 2-norm = S(1)/S(min(m,n)).
113: *> \endverbatim
114: *>
115: *> \param[in] RCOND
116: *> \verbatim
117: *> RCOND is DOUBLE PRECISION
118: *> RCOND is used to determine the effective rank of A.
119: *> Singular values S(i) <= RCOND*S(1) are treated as zero.
120: *> If RCOND < 0, machine precision is used instead.
121: *> \endverbatim
122: *>
123: *> \param[out] RANK
124: *> \verbatim
125: *> RANK is INTEGER
126: *> The effective rank of A, i.e., the number of singular values
127: *> which are greater than RCOND*S(1).
128: *> \endverbatim
129: *>
130: *> \param[out] WORK
131: *> \verbatim
132: *> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
133: *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
134: *> \endverbatim
135: *>
136: *> \param[in] LWORK
137: *> \verbatim
138: *> LWORK is INTEGER
139: *> The dimension of the array WORK. LWORK >= 1, and also:
140: *> LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
141: *> For good performance, LWORK should generally be larger.
142: *>
143: *> If LWORK = -1, then a workspace query is assumed; the routine
144: *> only calculates the optimal size of the WORK array, returns
145: *> this value as the first entry of the WORK array, and no error
146: *> message related to LWORK is issued by XERBLA.
147: *> \endverbatim
148: *>
149: *> \param[out] INFO
150: *> \verbatim
151: *> INFO is INTEGER
152: *> = 0: successful exit
153: *> < 0: if INFO = -i, the i-th argument had an illegal value.
154: *> > 0: the algorithm for computing the SVD failed to converge;
155: *> if INFO = i, i off-diagonal elements of an intermediate
156: *> bidiagonal form did not converge to zero.
157: *> \endverbatim
158: *
159: * Authors:
160: * ========
161: *
1.15 bertrand 162: *> \author Univ. of Tennessee
163: *> \author Univ. of California Berkeley
164: *> \author Univ. of Colorado Denver
165: *> \author NAG Ltd.
1.9 bertrand 166: *
167: *> \ingroup doubleGEsolve
168: *
169: * =====================================================================
1.1 bertrand 170: SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
171: $ WORK, LWORK, INFO )
172: *
1.18 ! bertrand 173: * -- LAPACK driver routine --
1.1 bertrand 174: * -- LAPACK is a software package provided by Univ. of Tennessee, --
175: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176: *
177: * .. Scalar Arguments ..
178: INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
179: DOUBLE PRECISION RCOND
180: * ..
181: * .. Array Arguments ..
182: DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
183: * ..
184: *
185: * =====================================================================
186: *
187: * .. Parameters ..
188: DOUBLE PRECISION ZERO, ONE
189: PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
190: * ..
191: * .. Local Scalars ..
192: LOGICAL LQUERY
193: INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
194: $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
195: $ MAXWRK, MINMN, MINWRK, MM, MNTHR
1.9 bertrand 196: INTEGER LWORK_DGEQRF, LWORK_DORMQR, LWORK_DGEBRD,
197: $ LWORK_DORMBR, LWORK_DORGBR, LWORK_DORMLQ,
198: $ LWORK_DGELQF
1.1 bertrand 199: DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
200: * ..
201: * .. Local Arrays ..
1.9 bertrand 202: DOUBLE PRECISION DUM( 1 )
1.1 bertrand 203: * ..
204: * .. External Subroutines ..
205: EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
206: $ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
207: $ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
208: * ..
209: * .. External Functions ..
210: INTEGER ILAENV
211: DOUBLE PRECISION DLAMCH, DLANGE
212: EXTERNAL ILAENV, DLAMCH, DLANGE
213: * ..
214: * .. Intrinsic Functions ..
215: INTRINSIC MAX, MIN
216: * ..
217: * .. Executable Statements ..
218: *
219: * Test the input arguments
220: *
221: INFO = 0
222: MINMN = MIN( M, N )
223: MAXMN = MAX( M, N )
224: LQUERY = ( LWORK.EQ.-1 )
225: IF( M.LT.0 ) THEN
226: INFO = -1
227: ELSE IF( N.LT.0 ) THEN
228: INFO = -2
229: ELSE IF( NRHS.LT.0 ) THEN
230: INFO = -3
231: ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
232: INFO = -5
233: ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
234: INFO = -7
235: END IF
236: *
237: * Compute workspace
238: * (Note: Comments in the code beginning "Workspace:" describe the
239: * minimal amount of workspace needed at that point in the code,
240: * as well as the preferred amount for good performance.
241: * NB refers to the optimal block size for the immediately
242: * following subroutine, as returned by ILAENV.)
243: *
244: IF( INFO.EQ.0 ) THEN
245: MINWRK = 1
246: MAXWRK = 1
247: IF( MINMN.GT.0 ) THEN
248: MM = M
249: MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
250: IF( M.GE.N .AND. M.GE.MNTHR ) THEN
251: *
252: * Path 1a - overdetermined, with many more rows than
253: * columns
254: *
1.9 bertrand 255: * Compute space needed for DGEQRF
256: CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO )
1.18 ! bertrand 257: LWORK_DGEQRF = INT( DUM(1) )
1.9 bertrand 258: * Compute space needed for DORMQR
259: CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B,
260: $ LDB, DUM(1), -1, INFO )
1.18 ! bertrand 261: LWORK_DORMQR = INT( DUM(1) )
1.1 bertrand 262: MM = N
1.9 bertrand 263: MAXWRK = MAX( MAXWRK, N + LWORK_DGEQRF )
264: MAXWRK = MAX( MAXWRK, N + LWORK_DORMQR )
1.1 bertrand 265: END IF
266: IF( M.GE.N ) THEN
267: *
268: * Path 1 - overdetermined or exactly determined
269: *
270: * Compute workspace needed for DBDSQR
271: *
272: BDSPAC = MAX( 1, 5*N )
1.9 bertrand 273: * Compute space needed for DGEBRD
274: CALL DGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1),
275: $ DUM(1), DUM(1), -1, INFO )
1.18 ! bertrand 276: LWORK_DGEBRD = INT( DUM(1) )
1.9 bertrand 277: * Compute space needed for DORMBR
278: CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1),
279: $ B, LDB, DUM(1), -1, INFO )
1.18 ! bertrand 280: LWORK_DORMBR = INT( DUM(1) )
1.9 bertrand 281: * Compute space needed for DORGBR
282: CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
283: $ DUM(1), -1, INFO )
1.18 ! bertrand 284: LWORK_DORGBR = INT( DUM(1) )
1.15 bertrand 285: * Compute total workspace needed
1.9 bertrand 286: MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
287: MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORMBR )
288: MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR )
1.1 bertrand 289: MAXWRK = MAX( MAXWRK, BDSPAC )
290: MAXWRK = MAX( MAXWRK, N*NRHS )
291: MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
292: MAXWRK = MAX( MINWRK, MAXWRK )
293: END IF
294: IF( N.GT.M ) THEN
295: *
296: * Compute workspace needed for DBDSQR
297: *
298: BDSPAC = MAX( 1, 5*M )
299: MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
300: IF( N.GE.MNTHR ) THEN
301: *
302: * Path 2a - underdetermined, with many more columns
303: * than rows
304: *
1.9 bertrand 305: * Compute space needed for DGELQF
306: CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1),
307: $ -1, INFO )
1.18 ! bertrand 308: LWORK_DGELQF = INT( DUM(1) )
1.9 bertrand 309: * Compute space needed for DGEBRD
310: CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
311: $ DUM(1), DUM(1), -1, INFO )
1.18 ! bertrand 312: LWORK_DGEBRD = INT( DUM(1) )
1.9 bertrand 313: * Compute space needed for DORMBR
1.15 bertrand 314: CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA,
1.9 bertrand 315: $ DUM(1), B, LDB, DUM(1), -1, INFO )
1.18 ! bertrand 316: LWORK_DORMBR = INT( DUM(1) )
1.9 bertrand 317: * Compute space needed for DORGBR
318: CALL DORGBR( 'P', M, M, M, A, LDA, DUM(1),
319: $ DUM(1), -1, INFO )
1.18 ! bertrand 320: LWORK_DORGBR = INT( DUM(1) )
1.9 bertrand 321: * Compute space needed for DORMLQ
322: CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1),
323: $ B, LDB, DUM(1), -1, INFO )
1.18 ! bertrand 324: LWORK_DORMLQ = INT( DUM(1) )
1.15 bertrand 325: * Compute total workspace needed
1.9 bertrand 326: MAXWRK = M + LWORK_DGELQF
327: MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DGEBRD )
328: MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORMBR )
329: MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORGBR )
1.1 bertrand 330: MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
331: IF( NRHS.GT.1 ) THEN
332: MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
333: ELSE
334: MAXWRK = MAX( MAXWRK, M*M + 2*M )
335: END IF
1.9 bertrand 336: MAXWRK = MAX( MAXWRK, M + LWORK_DORMLQ )
1.1 bertrand 337: ELSE
338: *
339: * Path 2 - underdetermined
340: *
1.9 bertrand 341: * Compute space needed for DGEBRD
342: CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
343: $ DUM(1), DUM(1), -1, INFO )
1.18 ! bertrand 344: LWORK_DGEBRD = INT( DUM(1) )
1.9 bertrand 345: * Compute space needed for DORMBR
1.15 bertrand 346: CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA,
1.9 bertrand 347: $ DUM(1), B, LDB, DUM(1), -1, INFO )
1.18 ! bertrand 348: LWORK_DORMBR = INT( DUM(1) )
1.9 bertrand 349: * Compute space needed for DORGBR
350: CALL DORGBR( 'P', M, N, M, A, LDA, DUM(1),
351: $ DUM(1), -1, INFO )
1.18 ! bertrand 352: LWORK_DORGBR = INT( DUM(1) )
1.9 bertrand 353: MAXWRK = 3*M + LWORK_DGEBRD
354: MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORMBR )
355: MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR )
1.1 bertrand 356: MAXWRK = MAX( MAXWRK, BDSPAC )
357: MAXWRK = MAX( MAXWRK, N*NRHS )
358: END IF
359: END IF
360: MAXWRK = MAX( MINWRK, MAXWRK )
361: END IF
362: WORK( 1 ) = MAXWRK
363: *
364: IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
365: $ INFO = -12
366: END IF
367: *
368: IF( INFO.NE.0 ) THEN
369: CALL XERBLA( 'DGELSS', -INFO )
370: RETURN
371: ELSE IF( LQUERY ) THEN
372: RETURN
373: END IF
374: *
375: * Quick return if possible
376: *
377: IF( M.EQ.0 .OR. N.EQ.0 ) THEN
378: RANK = 0
379: RETURN
380: END IF
381: *
382: * Get machine parameters
383: *
384: EPS = DLAMCH( 'P' )
385: SFMIN = DLAMCH( 'S' )
386: SMLNUM = SFMIN / EPS
387: BIGNUM = ONE / SMLNUM
388: CALL DLABAD( SMLNUM, BIGNUM )
389: *
390: * Scale A if max element outside range [SMLNUM,BIGNUM]
391: *
392: ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
393: IASCL = 0
394: IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
395: *
396: * Scale matrix norm up to SMLNUM
397: *
398: CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
399: IASCL = 1
400: ELSE IF( ANRM.GT.BIGNUM ) THEN
401: *
402: * Scale matrix norm down to BIGNUM
403: *
404: CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
405: IASCL = 2
406: ELSE IF( ANRM.EQ.ZERO ) THEN
407: *
408: * Matrix all zero. Return zero solution.
409: *
410: CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
1.5 bertrand 411: CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
1.1 bertrand 412: RANK = 0
413: GO TO 70
414: END IF
415: *
416: * Scale B if max element outside range [SMLNUM,BIGNUM]
417: *
418: BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
419: IBSCL = 0
420: IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
421: *
422: * Scale matrix norm up to SMLNUM
423: *
424: CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
425: IBSCL = 1
426: ELSE IF( BNRM.GT.BIGNUM ) THEN
427: *
428: * Scale matrix norm down to BIGNUM
429: *
430: CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
431: IBSCL = 2
432: END IF
433: *
434: * Overdetermined case
435: *
436: IF( M.GE.N ) THEN
437: *
438: * Path 1 - overdetermined or exactly determined
439: *
440: MM = M
441: IF( M.GE.MNTHR ) THEN
442: *
443: * Path 1a - overdetermined, with many more rows than columns
444: *
445: MM = N
446: ITAU = 1
447: IWORK = ITAU + N
448: *
449: * Compute A=Q*R
450: * (Workspace: need 2*N, prefer N+N*NB)
451: *
452: CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
453: $ LWORK-IWORK+1, INFO )
454: *
455: * Multiply B by transpose(Q)
456: * (Workspace: need N+NRHS, prefer N+NRHS*NB)
457: *
458: CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
459: $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
460: *
461: * Zero out below R
462: *
463: IF( N.GT.1 )
464: $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
465: END IF
466: *
467: IE = 1
468: ITAUQ = IE + N
469: ITAUP = ITAUQ + N
470: IWORK = ITAUP + N
471: *
472: * Bidiagonalize R in A
473: * (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
474: *
475: CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
476: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
477: $ INFO )
478: *
479: * Multiply B by transpose of left bidiagonalizing vectors of R
480: * (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
481: *
482: CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
483: $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
484: *
485: * Generate right bidiagonalizing vectors of R in A
486: * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
487: *
488: CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
489: $ WORK( IWORK ), LWORK-IWORK+1, INFO )
490: IWORK = IE + N
491: *
492: * Perform bidiagonal QR iteration
493: * multiply B by transpose of left singular vectors
494: * compute right singular vectors in A
495: * (Workspace: need BDSPAC)
496: *
1.9 bertrand 497: CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
1.1 bertrand 498: $ 1, B, LDB, WORK( IWORK ), INFO )
499: IF( INFO.NE.0 )
500: $ GO TO 70
501: *
502: * Multiply B by reciprocals of singular values
503: *
504: THR = MAX( RCOND*S( 1 ), SFMIN )
505: IF( RCOND.LT.ZERO )
506: $ THR = MAX( EPS*S( 1 ), SFMIN )
507: RANK = 0
508: DO 10 I = 1, N
509: IF( S( I ).GT.THR ) THEN
510: CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
511: RANK = RANK + 1
512: ELSE
513: CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
514: END IF
515: 10 CONTINUE
516: *
517: * Multiply B by right singular vectors
518: * (Workspace: need N, prefer N*NRHS)
519: *
520: IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
521: CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
522: $ WORK, LDB )
523: CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
524: ELSE IF( NRHS.GT.1 ) THEN
525: CHUNK = LWORK / N
526: DO 20 I = 1, NRHS, CHUNK
527: BL = MIN( NRHS-I+1, CHUNK )
528: CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
529: $ LDB, ZERO, WORK, N )
530: CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
531: 20 CONTINUE
532: ELSE
533: CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
534: CALL DCOPY( N, WORK, 1, B, 1 )
535: END IF
536: *
537: ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
538: $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
539: *
540: * Path 2a - underdetermined, with many more columns than rows
541: * and sufficient workspace for an efficient algorithm
542: *
543: LDWORK = M
544: IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
545: $ M*LDA+M+M*NRHS ) )LDWORK = LDA
546: ITAU = 1
547: IWORK = M + 1
548: *
549: * Compute A=L*Q
550: * (Workspace: need 2*M, prefer M+M*NB)
551: *
552: CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
553: $ LWORK-IWORK+1, INFO )
554: IL = IWORK
555: *
556: * Copy L to WORK(IL), zeroing out above it
557: *
558: CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
559: CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
560: $ LDWORK )
561: IE = IL + LDWORK*M
562: ITAUQ = IE + M
563: ITAUP = ITAUQ + M
564: IWORK = ITAUP + M
565: *
566: * Bidiagonalize L in WORK(IL)
567: * (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
568: *
569: CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
570: $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
571: $ LWORK-IWORK+1, INFO )
572: *
573: * Multiply B by transpose of left bidiagonalizing vectors of L
574: * (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
575: *
576: CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
577: $ WORK( ITAUQ ), B, LDB, WORK( IWORK ),
578: $ LWORK-IWORK+1, INFO )
579: *
580: * Generate right bidiagonalizing vectors of R in WORK(IL)
581: * (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
582: *
583: CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
584: $ WORK( IWORK ), LWORK-IWORK+1, INFO )
585: IWORK = IE + M
586: *
587: * Perform bidiagonal QR iteration,
588: * computing right singular vectors of L in WORK(IL) and
589: * multiplying B by transpose of left singular vectors
590: * (Workspace: need M*M+M+BDSPAC)
591: *
592: CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
593: $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
594: IF( INFO.NE.0 )
595: $ GO TO 70
596: *
597: * Multiply B by reciprocals of singular values
598: *
599: THR = MAX( RCOND*S( 1 ), SFMIN )
600: IF( RCOND.LT.ZERO )
601: $ THR = MAX( EPS*S( 1 ), SFMIN )
602: RANK = 0
603: DO 30 I = 1, M
604: IF( S( I ).GT.THR ) THEN
605: CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
606: RANK = RANK + 1
607: ELSE
608: CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
609: END IF
610: 30 CONTINUE
611: IWORK = IE
612: *
613: * Multiply B by right singular vectors of L in WORK(IL)
614: * (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
615: *
616: IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
617: CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
618: $ B, LDB, ZERO, WORK( IWORK ), LDB )
619: CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
620: ELSE IF( NRHS.GT.1 ) THEN
621: CHUNK = ( LWORK-IWORK+1 ) / M
622: DO 40 I = 1, NRHS, CHUNK
623: BL = MIN( NRHS-I+1, CHUNK )
624: CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
625: $ B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
626: CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
627: $ LDB )
628: 40 CONTINUE
629: ELSE
630: CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
631: $ 1, ZERO, WORK( IWORK ), 1 )
632: CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
633: END IF
634: *
635: * Zero out below first M rows of B
636: *
637: CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
638: IWORK = ITAU + M
639: *
640: * Multiply transpose(Q) by B
641: * (Workspace: need M+NRHS, prefer M+NRHS*NB)
642: *
643: CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
644: $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
645: *
646: ELSE
647: *
648: * Path 2 - remaining underdetermined cases
649: *
650: IE = 1
651: ITAUQ = IE + M
652: ITAUP = ITAUQ + M
653: IWORK = ITAUP + M
654: *
655: * Bidiagonalize A
656: * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
657: *
658: CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
659: $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
660: $ INFO )
661: *
662: * Multiply B by transpose of left bidiagonalizing vectors
663: * (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
664: *
665: CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
666: $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
667: *
668: * Generate right bidiagonalizing vectors in A
669: * (Workspace: need 4*M, prefer 3*M+M*NB)
670: *
671: CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
672: $ WORK( IWORK ), LWORK-IWORK+1, INFO )
673: IWORK = IE + M
674: *
675: * Perform bidiagonal QR iteration,
676: * computing right singular vectors of A in A and
677: * multiplying B by transpose of left singular vectors
678: * (Workspace: need BDSPAC)
679: *
1.9 bertrand 680: CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
1.1 bertrand 681: $ 1, B, LDB, WORK( IWORK ), INFO )
682: IF( INFO.NE.0 )
683: $ GO TO 70
684: *
685: * Multiply B by reciprocals of singular values
686: *
687: THR = MAX( RCOND*S( 1 ), SFMIN )
688: IF( RCOND.LT.ZERO )
689: $ THR = MAX( EPS*S( 1 ), SFMIN )
690: RANK = 0
691: DO 50 I = 1, M
692: IF( S( I ).GT.THR ) THEN
693: CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
694: RANK = RANK + 1
695: ELSE
696: CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
697: END IF
698: 50 CONTINUE
699: *
700: * Multiply B by right singular vectors of A
701: * (Workspace: need N, prefer N*NRHS)
702: *
703: IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
704: CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
705: $ WORK, LDB )
706: CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
707: ELSE IF( NRHS.GT.1 ) THEN
708: CHUNK = LWORK / N
709: DO 60 I = 1, NRHS, CHUNK
710: BL = MIN( NRHS-I+1, CHUNK )
711: CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
712: $ LDB, ZERO, WORK, N )
713: CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
714: 60 CONTINUE
715: ELSE
716: CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
717: CALL DCOPY( N, WORK, 1, B, 1 )
718: END IF
719: END IF
720: *
721: * Undo scaling
722: *
723: IF( IASCL.EQ.1 ) THEN
724: CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
725: CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
726: $ INFO )
727: ELSE IF( IASCL.EQ.2 ) THEN
728: CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
729: CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
730: $ INFO )
731: END IF
732: IF( IBSCL.EQ.1 ) THEN
733: CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
734: ELSE IF( IBSCL.EQ.2 ) THEN
735: CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
736: END IF
737: *
738: 70 CONTINUE
739: WORK( 1 ) = MAXWRK
740: RETURN
741: *
742: * End of DGELSS
743: *
744: END
CVSweb interface <joel.bertrand@systella.fr>