1: SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
2: *
3: * -- LAPACK routine (version 3.2) --
4: * -- LAPACK is a software package provided by Univ. of Tennessee, --
5: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
6: * November 2006
7: *
8: * .. Scalar Arguments ..
9: INTEGER INFO, KL, KU, LDAB, M, N
10: * ..
11: * .. Array Arguments ..
12: INTEGER IPIV( * )
13: DOUBLE PRECISION AB( LDAB, * )
14: * ..
15: *
16: * Purpose
17: * =======
18: *
19: * DGBTRF computes an LU factorization of a real m-by-n band matrix A
20: * using partial pivoting with row interchanges.
21: *
22: * This is the blocked version of the algorithm, calling Level 3 BLAS.
23: *
24: * Arguments
25: * =========
26: *
27: * M (input) INTEGER
28: * The number of rows of the matrix A. M >= 0.
29: *
30: * N (input) INTEGER
31: * The number of columns of the matrix A. N >= 0.
32: *
33: * KL (input) INTEGER
34: * The number of subdiagonals within the band of A. KL >= 0.
35: *
36: * KU (input) INTEGER
37: * The number of superdiagonals within the band of A. KU >= 0.
38: *
39: * AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
40: * On entry, the matrix A in band storage, in rows KL+1 to
41: * 2*KL+KU+1; rows 1 to KL of the array need not be set.
42: * The j-th column of A is stored in the j-th column of the
43: * array AB as follows:
44: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
45: *
46: * On exit, details of the factorization: U is stored as an
47: * upper triangular band matrix with KL+KU superdiagonals in
48: * rows 1 to KL+KU+1, and the multipliers used during the
49: * factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
50: * See below for further details.
51: *
52: * LDAB (input) INTEGER
53: * The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
54: *
55: * IPIV (output) INTEGER array, dimension (min(M,N))
56: * The pivot indices; for 1 <= i <= min(M,N), row i of the
57: * matrix was interchanged with row IPIV(i).
58: *
59: * INFO (output) INTEGER
60: * = 0: successful exit
61: * < 0: if INFO = -i, the i-th argument had an illegal value
62: * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
63: * has been completed, but the factor U is exactly
64: * singular, and division by zero will occur if it is used
65: * to solve a system of equations.
66: *
67: * Further Details
68: * ===============
69: *
70: * The band storage scheme is illustrated by the following example, when
71: * M = N = 6, KL = 2, KU = 1:
72: *
73: * On entry: On exit:
74: *
75: * * * * + + + * * * u14 u25 u36
76: * * * + + + + * * u13 u24 u35 u46
77: * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
78: * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
79: * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
80: * a31 a42 a53 a64 * * m31 m42 m53 m64 * *
81: *
82: * Array elements marked * are not used by the routine; elements marked
83: * + need not be set on entry, but are required by the routine to store
84: * elements of U because of fill-in resulting from the row interchanges.
85: *
86: * =====================================================================
87: *
88: * .. Parameters ..
89: DOUBLE PRECISION ONE, ZERO
90: PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
91: INTEGER NBMAX, LDWORK
92: PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
93: * ..
94: * .. Local Scalars ..
95: INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
96: $ JU, K2, KM, KV, NB, NW
97: DOUBLE PRECISION TEMP
98: * ..
99: * .. Local Arrays ..
100: DOUBLE PRECISION WORK13( LDWORK, NBMAX ),
101: $ WORK31( LDWORK, NBMAX )
102: * ..
103: * .. External Functions ..
104: INTEGER IDAMAX, ILAENV
105: EXTERNAL IDAMAX, ILAENV
106: * ..
107: * .. External Subroutines ..
108: EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
109: $ DSWAP, DTRSM, XERBLA
110: * ..
111: * .. Intrinsic Functions ..
112: INTRINSIC MAX, MIN
113: * ..
114: * .. Executable Statements ..
115: *
116: * KV is the number of superdiagonals in the factor U, allowing for
117: * fill-in
118: *
119: KV = KU + KL
120: *
121: * Test the input parameters.
122: *
123: INFO = 0
124: IF( M.LT.0 ) THEN
125: INFO = -1
126: ELSE IF( N.LT.0 ) THEN
127: INFO = -2
128: ELSE IF( KL.LT.0 ) THEN
129: INFO = -3
130: ELSE IF( KU.LT.0 ) THEN
131: INFO = -4
132: ELSE IF( LDAB.LT.KL+KV+1 ) THEN
133: INFO = -6
134: END IF
135: IF( INFO.NE.0 ) THEN
136: CALL XERBLA( 'DGBTRF', -INFO )
137: RETURN
138: END IF
139: *
140: * Quick return if possible
141: *
142: IF( M.EQ.0 .OR. N.EQ.0 )
143: $ RETURN
144: *
145: * Determine the block size for this environment
146: *
147: NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
148: *
149: * The block size must not exceed the limit set by the size of the
150: * local arrays WORK13 and WORK31.
151: *
152: NB = MIN( NB, NBMAX )
153: *
154: IF( NB.LE.1 .OR. NB.GT.KL ) THEN
155: *
156: * Use unblocked code
157: *
158: CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
159: ELSE
160: *
161: * Use blocked code
162: *
163: * Zero the superdiagonal elements of the work array WORK13
164: *
165: DO 20 J = 1, NB
166: DO 10 I = 1, J - 1
167: WORK13( I, J ) = ZERO
168: 10 CONTINUE
169: 20 CONTINUE
170: *
171: * Zero the subdiagonal elements of the work array WORK31
172: *
173: DO 40 J = 1, NB
174: DO 30 I = J + 1, NB
175: WORK31( I, J ) = ZERO
176: 30 CONTINUE
177: 40 CONTINUE
178: *
179: * Gaussian elimination with partial pivoting
180: *
181: * Set fill-in elements in columns KU+2 to KV to zero
182: *
183: DO 60 J = KU + 2, MIN( KV, N )
184: DO 50 I = KV - J + 2, KL
185: AB( I, J ) = ZERO
186: 50 CONTINUE
187: 60 CONTINUE
188: *
189: * JU is the index of the last column affected by the current
190: * stage of the factorization
191: *
192: JU = 1
193: *
194: DO 180 J = 1, MIN( M, N ), NB
195: JB = MIN( NB, MIN( M, N )-J+1 )
196: *
197: * The active part of the matrix is partitioned
198: *
199: * A11 A12 A13
200: * A21 A22 A23
201: * A31 A32 A33
202: *
203: * Here A11, A21 and A31 denote the current block of JB columns
204: * which is about to be factorized. The number of rows in the
205: * partitioning are JB, I2, I3 respectively, and the numbers
206: * of columns are JB, J2, J3. The superdiagonal elements of A13
207: * and the subdiagonal elements of A31 lie outside the band.
208: *
209: I2 = MIN( KL-JB, M-J-JB+1 )
210: I3 = MIN( JB, M-J-KL+1 )
211: *
212: * J2 and J3 are computed after JU has been updated.
213: *
214: * Factorize the current block of JB columns
215: *
216: DO 80 JJ = J, J + JB - 1
217: *
218: * Set fill-in elements in column JJ+KV to zero
219: *
220: IF( JJ+KV.LE.N ) THEN
221: DO 70 I = 1, KL
222: AB( I, JJ+KV ) = ZERO
223: 70 CONTINUE
224: END IF
225: *
226: * Find pivot and test for singularity. KM is the number of
227: * subdiagonal elements in the current column.
228: *
229: KM = MIN( KL, M-JJ )
230: JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
231: IPIV( JJ ) = JP + JJ - J
232: IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
233: JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
234: IF( JP.NE.1 ) THEN
235: *
236: * Apply interchange to columns J to J+JB-1
237: *
238: IF( JP+JJ-1.LT.J+KL ) THEN
239: *
240: CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
241: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
242: ELSE
243: *
244: * The interchange affects columns J to JJ-1 of A31
245: * which are stored in the work array WORK31
246: *
247: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
248: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
249: CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
250: $ AB( KV+JP, JJ ), LDAB-1 )
251: END IF
252: END IF
253: *
254: * Compute multipliers
255: *
256: CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
257: $ 1 )
258: *
259: * Update trailing submatrix within the band and within
260: * the current block. JM is the index of the last column
261: * which needs to be updated.
262: *
263: JM = MIN( JU, J+JB-1 )
264: IF( JM.GT.JJ )
265: $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
266: $ AB( KV, JJ+1 ), LDAB-1,
267: $ AB( KV+1, JJ+1 ), LDAB-1 )
268: ELSE
269: *
270: * If pivot is zero, set INFO to the index of the pivot
271: * unless a zero pivot has already been found.
272: *
273: IF( INFO.EQ.0 )
274: $ INFO = JJ
275: END IF
276: *
277: * Copy current column of A31 into the work array WORK31
278: *
279: NW = MIN( JJ-J+1, I3 )
280: IF( NW.GT.0 )
281: $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
282: $ WORK31( 1, JJ-J+1 ), 1 )
283: 80 CONTINUE
284: IF( J+JB.LE.N ) THEN
285: *
286: * Apply the row interchanges to the other blocks.
287: *
288: J2 = MIN( JU-J+1, KV ) - JB
289: J3 = MAX( 0, JU-J-KV+1 )
290: *
291: * Use DLASWP to apply the row interchanges to A12, A22, and
292: * A32.
293: *
294: CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
295: $ IPIV( J ), 1 )
296: *
297: * Adjust the pivot indices.
298: *
299: DO 90 I = J, J + JB - 1
300: IPIV( I ) = IPIV( I ) + J - 1
301: 90 CONTINUE
302: *
303: * Apply the row interchanges to A13, A23, and A33
304: * columnwise.
305: *
306: K2 = J - 1 + JB + J2
307: DO 110 I = 1, J3
308: JJ = K2 + I
309: DO 100 II = J + I - 1, J + JB - 1
310: IP = IPIV( II )
311: IF( IP.NE.II ) THEN
312: TEMP = AB( KV+1+II-JJ, JJ )
313: AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
314: AB( KV+1+IP-JJ, JJ ) = TEMP
315: END IF
316: 100 CONTINUE
317: 110 CONTINUE
318: *
319: * Update the relevant part of the trailing submatrix
320: *
321: IF( J2.GT.0 ) THEN
322: *
323: * Update A12
324: *
325: CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
326: $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
327: $ AB( KV+1-JB, J+JB ), LDAB-1 )
328: *
329: IF( I2.GT.0 ) THEN
330: *
331: * Update A22
332: *
333: CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
334: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
335: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
336: $ AB( KV+1, J+JB ), LDAB-1 )
337: END IF
338: *
339: IF( I3.GT.0 ) THEN
340: *
341: * Update A32
342: *
343: CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
344: $ JB, -ONE, WORK31, LDWORK,
345: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
346: $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
347: END IF
348: END IF
349: *
350: IF( J3.GT.0 ) THEN
351: *
352: * Copy the lower triangle of A13 into the work array
353: * WORK13
354: *
355: DO 130 JJ = 1, J3
356: DO 120 II = JJ, JB
357: WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
358: 120 CONTINUE
359: 130 CONTINUE
360: *
361: * Update A13 in the work array
362: *
363: CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
364: $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
365: $ WORK13, LDWORK )
366: *
367: IF( I2.GT.0 ) THEN
368: *
369: * Update A23
370: *
371: CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
372: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
373: $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
374: $ LDAB-1 )
375: END IF
376: *
377: IF( I3.GT.0 ) THEN
378: *
379: * Update A33
380: *
381: CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
382: $ JB, -ONE, WORK31, LDWORK, WORK13,
383: $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
384: END IF
385: *
386: * Copy the lower triangle of A13 back into place
387: *
388: DO 150 JJ = 1, J3
389: DO 140 II = JJ, JB
390: AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
391: 140 CONTINUE
392: 150 CONTINUE
393: END IF
394: ELSE
395: *
396: * Adjust the pivot indices.
397: *
398: DO 160 I = J, J + JB - 1
399: IPIV( I ) = IPIV( I ) + J - 1
400: 160 CONTINUE
401: END IF
402: *
403: * Partially undo the interchanges in the current block to
404: * restore the upper triangular form of A31 and copy the upper
405: * triangle of A31 back into place
406: *
407: DO 170 JJ = J + JB - 1, J, -1
408: JP = IPIV( JJ ) - JJ + 1
409: IF( JP.NE.1 ) THEN
410: *
411: * Apply interchange to columns J to JJ-1
412: *
413: IF( JP+JJ-1.LT.J+KL ) THEN
414: *
415: * The interchange does not affect A31
416: *
417: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
418: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
419: ELSE
420: *
421: * The interchange does affect A31
422: *
423: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
424: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
425: END IF
426: END IF
427: *
428: * Copy the current column of A31 back into place
429: *
430: NW = MIN( I3, JJ-J+1 )
431: IF( NW.GT.0 )
432: $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
433: $ AB( KV+KL+1-JJ+J, JJ ), 1 )
434: 170 CONTINUE
435: 180 CONTINUE
436: END IF
437: *
438: RETURN
439: *
440: * End of DGBTRF
441: *
442: END
CVSweb interface <joel.bertrand@systella.fr>