1: *> \brief \b DGBTRF
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download DGBTRF + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtrf.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtrf.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtrf.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
22: *
23: * .. Scalar Arguments ..
24: * INTEGER INFO, KL, KU, LDAB, M, N
25: * ..
26: * .. Array Arguments ..
27: * INTEGER IPIV( * )
28: * DOUBLE PRECISION AB( LDAB, * )
29: * ..
30: *
31: *
32: *> \par Purpose:
33: * =============
34: *>
35: *> \verbatim
36: *>
37: *> DGBTRF computes an LU factorization of a real m-by-n band matrix A
38: *> using partial pivoting with row interchanges.
39: *>
40: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
41: *> \endverbatim
42: *
43: * Arguments:
44: * ==========
45: *
46: *> \param[in] M
47: *> \verbatim
48: *> M is INTEGER
49: *> The number of rows of the matrix A. M >= 0.
50: *> \endverbatim
51: *>
52: *> \param[in] N
53: *> \verbatim
54: *> N is INTEGER
55: *> The number of columns of the matrix A. N >= 0.
56: *> \endverbatim
57: *>
58: *> \param[in] KL
59: *> \verbatim
60: *> KL is INTEGER
61: *> The number of subdiagonals within the band of A. KL >= 0.
62: *> \endverbatim
63: *>
64: *> \param[in] KU
65: *> \verbatim
66: *> KU is INTEGER
67: *> The number of superdiagonals within the band of A. KU >= 0.
68: *> \endverbatim
69: *>
70: *> \param[in,out] AB
71: *> \verbatim
72: *> AB is DOUBLE PRECISION array, dimension (LDAB,N)
73: *> On entry, the matrix A in band storage, in rows KL+1 to
74: *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
75: *> The j-th column of A is stored in the j-th column of the
76: *> array AB as follows:
77: *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
78: *>
79: *> On exit, details of the factorization: U is stored as an
80: *> upper triangular band matrix with KL+KU superdiagonals in
81: *> rows 1 to KL+KU+1, and the multipliers used during the
82: *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
83: *> See below for further details.
84: *> \endverbatim
85: *>
86: *> \param[in] LDAB
87: *> \verbatim
88: *> LDAB is INTEGER
89: *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
90: *> \endverbatim
91: *>
92: *> \param[out] IPIV
93: *> \verbatim
94: *> IPIV is INTEGER array, dimension (min(M,N))
95: *> The pivot indices; for 1 <= i <= min(M,N), row i of the
96: *> matrix was interchanged with row IPIV(i).
97: *> \endverbatim
98: *>
99: *> \param[out] INFO
100: *> \verbatim
101: *> INFO is INTEGER
102: *> = 0: successful exit
103: *> < 0: if INFO = -i, the i-th argument had an illegal value
104: *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
105: *> has been completed, but the factor U is exactly
106: *> singular, and division by zero will occur if it is used
107: *> to solve a system of equations.
108: *> \endverbatim
109: *
110: * Authors:
111: * ========
112: *
113: *> \author Univ. of Tennessee
114: *> \author Univ. of California Berkeley
115: *> \author Univ. of Colorado Denver
116: *> \author NAG Ltd.
117: *
118: *> \ingroup doubleGBcomputational
119: *
120: *> \par Further Details:
121: * =====================
122: *>
123: *> \verbatim
124: *>
125: *> The band storage scheme is illustrated by the following example, when
126: *> M = N = 6, KL = 2, KU = 1:
127: *>
128: *> On entry: On exit:
129: *>
130: *> * * * + + + * * * u14 u25 u36
131: *> * * + + + + * * u13 u24 u35 u46
132: *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
133: *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
134: *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
135: *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
136: *>
137: *> Array elements marked * are not used by the routine; elements marked
138: *> + need not be set on entry, but are required by the routine to store
139: *> elements of U because of fill-in resulting from the row interchanges.
140: *> \endverbatim
141: *>
142: * =====================================================================
143: SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
144: *
145: * -- LAPACK computational routine --
146: * -- LAPACK is a software package provided by Univ. of Tennessee, --
147: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
148: *
149: * .. Scalar Arguments ..
150: INTEGER INFO, KL, KU, LDAB, M, N
151: * ..
152: * .. Array Arguments ..
153: INTEGER IPIV( * )
154: DOUBLE PRECISION AB( LDAB, * )
155: * ..
156: *
157: * =====================================================================
158: *
159: * .. Parameters ..
160: DOUBLE PRECISION ONE, ZERO
161: PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
162: INTEGER NBMAX, LDWORK
163: PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
164: * ..
165: * .. Local Scalars ..
166: INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
167: $ JU, K2, KM, KV, NB, NW
168: DOUBLE PRECISION TEMP
169: * ..
170: * .. Local Arrays ..
171: DOUBLE PRECISION WORK13( LDWORK, NBMAX ),
172: $ WORK31( LDWORK, NBMAX )
173: * ..
174: * .. External Functions ..
175: INTEGER IDAMAX, ILAENV
176: EXTERNAL IDAMAX, ILAENV
177: * ..
178: * .. External Subroutines ..
179: EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
180: $ DSWAP, DTRSM, XERBLA
181: * ..
182: * .. Intrinsic Functions ..
183: INTRINSIC MAX, MIN
184: * ..
185: * .. Executable Statements ..
186: *
187: * KV is the number of superdiagonals in the factor U, allowing for
188: * fill-in
189: *
190: KV = KU + KL
191: *
192: * Test the input parameters.
193: *
194: INFO = 0
195: IF( M.LT.0 ) THEN
196: INFO = -1
197: ELSE IF( N.LT.0 ) THEN
198: INFO = -2
199: ELSE IF( KL.LT.0 ) THEN
200: INFO = -3
201: ELSE IF( KU.LT.0 ) THEN
202: INFO = -4
203: ELSE IF( LDAB.LT.KL+KV+1 ) THEN
204: INFO = -6
205: END IF
206: IF( INFO.NE.0 ) THEN
207: CALL XERBLA( 'DGBTRF', -INFO )
208: RETURN
209: END IF
210: *
211: * Quick return if possible
212: *
213: IF( M.EQ.0 .OR. N.EQ.0 )
214: $ RETURN
215: *
216: * Determine the block size for this environment
217: *
218: NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
219: *
220: * The block size must not exceed the limit set by the size of the
221: * local arrays WORK13 and WORK31.
222: *
223: NB = MIN( NB, NBMAX )
224: *
225: IF( NB.LE.1 .OR. NB.GT.KL ) THEN
226: *
227: * Use unblocked code
228: *
229: CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
230: ELSE
231: *
232: * Use blocked code
233: *
234: * Zero the superdiagonal elements of the work array WORK13
235: *
236: DO 20 J = 1, NB
237: DO 10 I = 1, J - 1
238: WORK13( I, J ) = ZERO
239: 10 CONTINUE
240: 20 CONTINUE
241: *
242: * Zero the subdiagonal elements of the work array WORK31
243: *
244: DO 40 J = 1, NB
245: DO 30 I = J + 1, NB
246: WORK31( I, J ) = ZERO
247: 30 CONTINUE
248: 40 CONTINUE
249: *
250: * Gaussian elimination with partial pivoting
251: *
252: * Set fill-in elements in columns KU+2 to KV to zero
253: *
254: DO 60 J = KU + 2, MIN( KV, N )
255: DO 50 I = KV - J + 2, KL
256: AB( I, J ) = ZERO
257: 50 CONTINUE
258: 60 CONTINUE
259: *
260: * JU is the index of the last column affected by the current
261: * stage of the factorization
262: *
263: JU = 1
264: *
265: DO 180 J = 1, MIN( M, N ), NB
266: JB = MIN( NB, MIN( M, N )-J+1 )
267: *
268: * The active part of the matrix is partitioned
269: *
270: * A11 A12 A13
271: * A21 A22 A23
272: * A31 A32 A33
273: *
274: * Here A11, A21 and A31 denote the current block of JB columns
275: * which is about to be factorized. The number of rows in the
276: * partitioning are JB, I2, I3 respectively, and the numbers
277: * of columns are JB, J2, J3. The superdiagonal elements of A13
278: * and the subdiagonal elements of A31 lie outside the band.
279: *
280: I2 = MIN( KL-JB, M-J-JB+1 )
281: I3 = MIN( JB, M-J-KL+1 )
282: *
283: * J2 and J3 are computed after JU has been updated.
284: *
285: * Factorize the current block of JB columns
286: *
287: DO 80 JJ = J, J + JB - 1
288: *
289: * Set fill-in elements in column JJ+KV to zero
290: *
291: IF( JJ+KV.LE.N ) THEN
292: DO 70 I = 1, KL
293: AB( I, JJ+KV ) = ZERO
294: 70 CONTINUE
295: END IF
296: *
297: * Find pivot and test for singularity. KM is the number of
298: * subdiagonal elements in the current column.
299: *
300: KM = MIN( KL, M-JJ )
301: JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
302: IPIV( JJ ) = JP + JJ - J
303: IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
304: JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
305: IF( JP.NE.1 ) THEN
306: *
307: * Apply interchange to columns J to J+JB-1
308: *
309: IF( JP+JJ-1.LT.J+KL ) THEN
310: *
311: CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
312: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
313: ELSE
314: *
315: * The interchange affects columns J to JJ-1 of A31
316: * which are stored in the work array WORK31
317: *
318: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
319: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
320: CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
321: $ AB( KV+JP, JJ ), LDAB-1 )
322: END IF
323: END IF
324: *
325: * Compute multipliers
326: *
327: CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
328: $ 1 )
329: *
330: * Update trailing submatrix within the band and within
331: * the current block. JM is the index of the last column
332: * which needs to be updated.
333: *
334: JM = MIN( JU, J+JB-1 )
335: IF( JM.GT.JJ )
336: $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
337: $ AB( KV, JJ+1 ), LDAB-1,
338: $ AB( KV+1, JJ+1 ), LDAB-1 )
339: ELSE
340: *
341: * If pivot is zero, set INFO to the index of the pivot
342: * unless a zero pivot has already been found.
343: *
344: IF( INFO.EQ.0 )
345: $ INFO = JJ
346: END IF
347: *
348: * Copy current column of A31 into the work array WORK31
349: *
350: NW = MIN( JJ-J+1, I3 )
351: IF( NW.GT.0 )
352: $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
353: $ WORK31( 1, JJ-J+1 ), 1 )
354: 80 CONTINUE
355: IF( J+JB.LE.N ) THEN
356: *
357: * Apply the row interchanges to the other blocks.
358: *
359: J2 = MIN( JU-J+1, KV ) - JB
360: J3 = MAX( 0, JU-J-KV+1 )
361: *
362: * Use DLASWP to apply the row interchanges to A12, A22, and
363: * A32.
364: *
365: CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
366: $ IPIV( J ), 1 )
367: *
368: * Adjust the pivot indices.
369: *
370: DO 90 I = J, J + JB - 1
371: IPIV( I ) = IPIV( I ) + J - 1
372: 90 CONTINUE
373: *
374: * Apply the row interchanges to A13, A23, and A33
375: * columnwise.
376: *
377: K2 = J - 1 + JB + J2
378: DO 110 I = 1, J3
379: JJ = K2 + I
380: DO 100 II = J + I - 1, J + JB - 1
381: IP = IPIV( II )
382: IF( IP.NE.II ) THEN
383: TEMP = AB( KV+1+II-JJ, JJ )
384: AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
385: AB( KV+1+IP-JJ, JJ ) = TEMP
386: END IF
387: 100 CONTINUE
388: 110 CONTINUE
389: *
390: * Update the relevant part of the trailing submatrix
391: *
392: IF( J2.GT.0 ) THEN
393: *
394: * Update A12
395: *
396: CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
397: $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
398: $ AB( KV+1-JB, J+JB ), LDAB-1 )
399: *
400: IF( I2.GT.0 ) THEN
401: *
402: * Update A22
403: *
404: CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
405: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
406: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
407: $ AB( KV+1, J+JB ), LDAB-1 )
408: END IF
409: *
410: IF( I3.GT.0 ) THEN
411: *
412: * Update A32
413: *
414: CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
415: $ JB, -ONE, WORK31, LDWORK,
416: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
417: $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
418: END IF
419: END IF
420: *
421: IF( J3.GT.0 ) THEN
422: *
423: * Copy the lower triangle of A13 into the work array
424: * WORK13
425: *
426: DO 130 JJ = 1, J3
427: DO 120 II = JJ, JB
428: WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
429: 120 CONTINUE
430: 130 CONTINUE
431: *
432: * Update A13 in the work array
433: *
434: CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
435: $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
436: $ WORK13, LDWORK )
437: *
438: IF( I2.GT.0 ) THEN
439: *
440: * Update A23
441: *
442: CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
443: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
444: $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
445: $ LDAB-1 )
446: END IF
447: *
448: IF( I3.GT.0 ) THEN
449: *
450: * Update A33
451: *
452: CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
453: $ JB, -ONE, WORK31, LDWORK, WORK13,
454: $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
455: END IF
456: *
457: * Copy the lower triangle of A13 back into place
458: *
459: DO 150 JJ = 1, J3
460: DO 140 II = JJ, JB
461: AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
462: 140 CONTINUE
463: 150 CONTINUE
464: END IF
465: ELSE
466: *
467: * Adjust the pivot indices.
468: *
469: DO 160 I = J, J + JB - 1
470: IPIV( I ) = IPIV( I ) + J - 1
471: 160 CONTINUE
472: END IF
473: *
474: * Partially undo the interchanges in the current block to
475: * restore the upper triangular form of A31 and copy the upper
476: * triangle of A31 back into place
477: *
478: DO 170 JJ = J + JB - 1, J, -1
479: JP = IPIV( JJ ) - JJ + 1
480: IF( JP.NE.1 ) THEN
481: *
482: * Apply interchange to columns J to JJ-1
483: *
484: IF( JP+JJ-1.LT.J+KL ) THEN
485: *
486: * The interchange does not affect A31
487: *
488: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
489: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
490: ELSE
491: *
492: * The interchange does affect A31
493: *
494: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
495: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
496: END IF
497: END IF
498: *
499: * Copy the current column of A31 back into place
500: *
501: NW = MIN( I3, JJ-J+1 )
502: IF( NW.GT.0 )
503: $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
504: $ AB( KV+KL+1-JJ+J, JJ ), 1 )
505: 170 CONTINUE
506: 180 CONTINUE
507: END IF
508: *
509: RETURN
510: *
511: * End of DGBTRF
512: *
513: END
CVSweb interface <joel.bertrand@systella.fr>