1: *> \brief \b DGBTRF
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download DGBTRF + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtrf.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtrf.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtrf.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
22: *
23: * .. Scalar Arguments ..
24: * INTEGER INFO, KL, KU, LDAB, M, N
25: * ..
26: * .. Array Arguments ..
27: * INTEGER IPIV( * )
28: * DOUBLE PRECISION AB( LDAB, * )
29: * ..
30: *
31: *
32: *> \par Purpose:
33: * =============
34: *>
35: *> \verbatim
36: *>
37: *> DGBTRF computes an LU factorization of a real m-by-n band matrix A
38: *> using partial pivoting with row interchanges.
39: *>
40: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
41: *> \endverbatim
42: *
43: * Arguments:
44: * ==========
45: *
46: *> \param[in] M
47: *> \verbatim
48: *> M is INTEGER
49: *> The number of rows of the matrix A. M >= 0.
50: *> \endverbatim
51: *>
52: *> \param[in] N
53: *> \verbatim
54: *> N is INTEGER
55: *> The number of columns of the matrix A. N >= 0.
56: *> \endverbatim
57: *>
58: *> \param[in] KL
59: *> \verbatim
60: *> KL is INTEGER
61: *> The number of subdiagonals within the band of A. KL >= 0.
62: *> \endverbatim
63: *>
64: *> \param[in] KU
65: *> \verbatim
66: *> KU is INTEGER
67: *> The number of superdiagonals within the band of A. KU >= 0.
68: *> \endverbatim
69: *>
70: *> \param[in,out] AB
71: *> \verbatim
72: *> AB is DOUBLE PRECISION array, dimension (LDAB,N)
73: *> On entry, the matrix A in band storage, in rows KL+1 to
74: *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
75: *> The j-th column of A is stored in the j-th column of the
76: *> array AB as follows:
77: *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
78: *>
79: *> On exit, details of the factorization: U is stored as an
80: *> upper triangular band matrix with KL+KU superdiagonals in
81: *> rows 1 to KL+KU+1, and the multipliers used during the
82: *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
83: *> See below for further details.
84: *> \endverbatim
85: *>
86: *> \param[in] LDAB
87: *> \verbatim
88: *> LDAB is INTEGER
89: *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
90: *> \endverbatim
91: *>
92: *> \param[out] IPIV
93: *> \verbatim
94: *> IPIV is INTEGER array, dimension (min(M,N))
95: *> The pivot indices; for 1 <= i <= min(M,N), row i of the
96: *> matrix was interchanged with row IPIV(i).
97: *> \endverbatim
98: *>
99: *> \param[out] INFO
100: *> \verbatim
101: *> INFO is INTEGER
102: *> = 0: successful exit
103: *> < 0: if INFO = -i, the i-th argument had an illegal value
104: *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
105: *> has been completed, but the factor U is exactly
106: *> singular, and division by zero will occur if it is used
107: *> to solve a system of equations.
108: *> \endverbatim
109: *
110: * Authors:
111: * ========
112: *
113: *> \author Univ. of Tennessee
114: *> \author Univ. of California Berkeley
115: *> \author Univ. of Colorado Denver
116: *> \author NAG Ltd.
117: *
118: *> \date November 2011
119: *
120: *> \ingroup doubleGBcomputational
121: *
122: *> \par Further Details:
123: * =====================
124: *>
125: *> \verbatim
126: *>
127: *> The band storage scheme is illustrated by the following example, when
128: *> M = N = 6, KL = 2, KU = 1:
129: *>
130: *> On entry: On exit:
131: *>
132: *> * * * + + + * * * u14 u25 u36
133: *> * * + + + + * * u13 u24 u35 u46
134: *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
135: *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
136: *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
137: *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
138: *>
139: *> Array elements marked * are not used by the routine; elements marked
140: *> + need not be set on entry, but are required by the routine to store
141: *> elements of U because of fill-in resulting from the row interchanges.
142: *> \endverbatim
143: *>
144: * =====================================================================
145: SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
146: *
147: * -- LAPACK computational routine (version 3.4.0) --
148: * -- LAPACK is a software package provided by Univ. of Tennessee, --
149: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
150: * November 2011
151: *
152: * .. Scalar Arguments ..
153: INTEGER INFO, KL, KU, LDAB, M, N
154: * ..
155: * .. Array Arguments ..
156: INTEGER IPIV( * )
157: DOUBLE PRECISION AB( LDAB, * )
158: * ..
159: *
160: * =====================================================================
161: *
162: * .. Parameters ..
163: DOUBLE PRECISION ONE, ZERO
164: PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
165: INTEGER NBMAX, LDWORK
166: PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
167: * ..
168: * .. Local Scalars ..
169: INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
170: $ JU, K2, KM, KV, NB, NW
171: DOUBLE PRECISION TEMP
172: * ..
173: * .. Local Arrays ..
174: DOUBLE PRECISION WORK13( LDWORK, NBMAX ),
175: $ WORK31( LDWORK, NBMAX )
176: * ..
177: * .. External Functions ..
178: INTEGER IDAMAX, ILAENV
179: EXTERNAL IDAMAX, ILAENV
180: * ..
181: * .. External Subroutines ..
182: EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
183: $ DSWAP, DTRSM, XERBLA
184: * ..
185: * .. Intrinsic Functions ..
186: INTRINSIC MAX, MIN
187: * ..
188: * .. Executable Statements ..
189: *
190: * KV is the number of superdiagonals in the factor U, allowing for
191: * fill-in
192: *
193: KV = KU + KL
194: *
195: * Test the input parameters.
196: *
197: INFO = 0
198: IF( M.LT.0 ) THEN
199: INFO = -1
200: ELSE IF( N.LT.0 ) THEN
201: INFO = -2
202: ELSE IF( KL.LT.0 ) THEN
203: INFO = -3
204: ELSE IF( KU.LT.0 ) THEN
205: INFO = -4
206: ELSE IF( LDAB.LT.KL+KV+1 ) THEN
207: INFO = -6
208: END IF
209: IF( INFO.NE.0 ) THEN
210: CALL XERBLA( 'DGBTRF', -INFO )
211: RETURN
212: END IF
213: *
214: * Quick return if possible
215: *
216: IF( M.EQ.0 .OR. N.EQ.0 )
217: $ RETURN
218: *
219: * Determine the block size for this environment
220: *
221: NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
222: *
223: * The block size must not exceed the limit set by the size of the
224: * local arrays WORK13 and WORK31.
225: *
226: NB = MIN( NB, NBMAX )
227: *
228: IF( NB.LE.1 .OR. NB.GT.KL ) THEN
229: *
230: * Use unblocked code
231: *
232: CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
233: ELSE
234: *
235: * Use blocked code
236: *
237: * Zero the superdiagonal elements of the work array WORK13
238: *
239: DO 20 J = 1, NB
240: DO 10 I = 1, J - 1
241: WORK13( I, J ) = ZERO
242: 10 CONTINUE
243: 20 CONTINUE
244: *
245: * Zero the subdiagonal elements of the work array WORK31
246: *
247: DO 40 J = 1, NB
248: DO 30 I = J + 1, NB
249: WORK31( I, J ) = ZERO
250: 30 CONTINUE
251: 40 CONTINUE
252: *
253: * Gaussian elimination with partial pivoting
254: *
255: * Set fill-in elements in columns KU+2 to KV to zero
256: *
257: DO 60 J = KU + 2, MIN( KV, N )
258: DO 50 I = KV - J + 2, KL
259: AB( I, J ) = ZERO
260: 50 CONTINUE
261: 60 CONTINUE
262: *
263: * JU is the index of the last column affected by the current
264: * stage of the factorization
265: *
266: JU = 1
267: *
268: DO 180 J = 1, MIN( M, N ), NB
269: JB = MIN( NB, MIN( M, N )-J+1 )
270: *
271: * The active part of the matrix is partitioned
272: *
273: * A11 A12 A13
274: * A21 A22 A23
275: * A31 A32 A33
276: *
277: * Here A11, A21 and A31 denote the current block of JB columns
278: * which is about to be factorized. The number of rows in the
279: * partitioning are JB, I2, I3 respectively, and the numbers
280: * of columns are JB, J2, J3. The superdiagonal elements of A13
281: * and the subdiagonal elements of A31 lie outside the band.
282: *
283: I2 = MIN( KL-JB, M-J-JB+1 )
284: I3 = MIN( JB, M-J-KL+1 )
285: *
286: * J2 and J3 are computed after JU has been updated.
287: *
288: * Factorize the current block of JB columns
289: *
290: DO 80 JJ = J, J + JB - 1
291: *
292: * Set fill-in elements in column JJ+KV to zero
293: *
294: IF( JJ+KV.LE.N ) THEN
295: DO 70 I = 1, KL
296: AB( I, JJ+KV ) = ZERO
297: 70 CONTINUE
298: END IF
299: *
300: * Find pivot and test for singularity. KM is the number of
301: * subdiagonal elements in the current column.
302: *
303: KM = MIN( KL, M-JJ )
304: JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
305: IPIV( JJ ) = JP + JJ - J
306: IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
307: JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
308: IF( JP.NE.1 ) THEN
309: *
310: * Apply interchange to columns J to J+JB-1
311: *
312: IF( JP+JJ-1.LT.J+KL ) THEN
313: *
314: CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
315: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
316: ELSE
317: *
318: * The interchange affects columns J to JJ-1 of A31
319: * which are stored in the work array WORK31
320: *
321: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
322: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
323: CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
324: $ AB( KV+JP, JJ ), LDAB-1 )
325: END IF
326: END IF
327: *
328: * Compute multipliers
329: *
330: CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
331: $ 1 )
332: *
333: * Update trailing submatrix within the band and within
334: * the current block. JM is the index of the last column
335: * which needs to be updated.
336: *
337: JM = MIN( JU, J+JB-1 )
338: IF( JM.GT.JJ )
339: $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
340: $ AB( KV, JJ+1 ), LDAB-1,
341: $ AB( KV+1, JJ+1 ), LDAB-1 )
342: ELSE
343: *
344: * If pivot is zero, set INFO to the index of the pivot
345: * unless a zero pivot has already been found.
346: *
347: IF( INFO.EQ.0 )
348: $ INFO = JJ
349: END IF
350: *
351: * Copy current column of A31 into the work array WORK31
352: *
353: NW = MIN( JJ-J+1, I3 )
354: IF( NW.GT.0 )
355: $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
356: $ WORK31( 1, JJ-J+1 ), 1 )
357: 80 CONTINUE
358: IF( J+JB.LE.N ) THEN
359: *
360: * Apply the row interchanges to the other blocks.
361: *
362: J2 = MIN( JU-J+1, KV ) - JB
363: J3 = MAX( 0, JU-J-KV+1 )
364: *
365: * Use DLASWP to apply the row interchanges to A12, A22, and
366: * A32.
367: *
368: CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
369: $ IPIV( J ), 1 )
370: *
371: * Adjust the pivot indices.
372: *
373: DO 90 I = J, J + JB - 1
374: IPIV( I ) = IPIV( I ) + J - 1
375: 90 CONTINUE
376: *
377: * Apply the row interchanges to A13, A23, and A33
378: * columnwise.
379: *
380: K2 = J - 1 + JB + J2
381: DO 110 I = 1, J3
382: JJ = K2 + I
383: DO 100 II = J + I - 1, J + JB - 1
384: IP = IPIV( II )
385: IF( IP.NE.II ) THEN
386: TEMP = AB( KV+1+II-JJ, JJ )
387: AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
388: AB( KV+1+IP-JJ, JJ ) = TEMP
389: END IF
390: 100 CONTINUE
391: 110 CONTINUE
392: *
393: * Update the relevant part of the trailing submatrix
394: *
395: IF( J2.GT.0 ) THEN
396: *
397: * Update A12
398: *
399: CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
400: $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
401: $ AB( KV+1-JB, J+JB ), LDAB-1 )
402: *
403: IF( I2.GT.0 ) THEN
404: *
405: * Update A22
406: *
407: CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
408: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
409: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
410: $ AB( KV+1, J+JB ), LDAB-1 )
411: END IF
412: *
413: IF( I3.GT.0 ) THEN
414: *
415: * Update A32
416: *
417: CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
418: $ JB, -ONE, WORK31, LDWORK,
419: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
420: $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
421: END IF
422: END IF
423: *
424: IF( J3.GT.0 ) THEN
425: *
426: * Copy the lower triangle of A13 into the work array
427: * WORK13
428: *
429: DO 130 JJ = 1, J3
430: DO 120 II = JJ, JB
431: WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
432: 120 CONTINUE
433: 130 CONTINUE
434: *
435: * Update A13 in the work array
436: *
437: CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
438: $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
439: $ WORK13, LDWORK )
440: *
441: IF( I2.GT.0 ) THEN
442: *
443: * Update A23
444: *
445: CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
446: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
447: $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
448: $ LDAB-1 )
449: END IF
450: *
451: IF( I3.GT.0 ) THEN
452: *
453: * Update A33
454: *
455: CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
456: $ JB, -ONE, WORK31, LDWORK, WORK13,
457: $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
458: END IF
459: *
460: * Copy the lower triangle of A13 back into place
461: *
462: DO 150 JJ = 1, J3
463: DO 140 II = JJ, JB
464: AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
465: 140 CONTINUE
466: 150 CONTINUE
467: END IF
468: ELSE
469: *
470: * Adjust the pivot indices.
471: *
472: DO 160 I = J, J + JB - 1
473: IPIV( I ) = IPIV( I ) + J - 1
474: 160 CONTINUE
475: END IF
476: *
477: * Partially undo the interchanges in the current block to
478: * restore the upper triangular form of A31 and copy the upper
479: * triangle of A31 back into place
480: *
481: DO 170 JJ = J + JB - 1, J, -1
482: JP = IPIV( JJ ) - JJ + 1
483: IF( JP.NE.1 ) THEN
484: *
485: * Apply interchange to columns J to JJ-1
486: *
487: IF( JP+JJ-1.LT.J+KL ) THEN
488: *
489: * The interchange does not affect A31
490: *
491: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
492: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
493: ELSE
494: *
495: * The interchange does affect A31
496: *
497: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
498: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
499: END IF
500: END IF
501: *
502: * Copy the current column of A31 back into place
503: *
504: NW = MIN( I3, JJ-J+1 )
505: IF( NW.GT.0 )
506: $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
507: $ AB( KV+KL+1-JJ+J, JJ ), 1 )
508: 170 CONTINUE
509: 180 CONTINUE
510: END IF
511: *
512: RETURN
513: *
514: * End of DGBTRF
515: *
516: END
CVSweb interface <joel.bertrand@systella.fr>