File:
[local] /
rpl /
lapack /
lapack /
zgbtrf.f
Revision
1.7:
download - view:
text,
annotated -
select for diffs -
revision graph
Tue Dec 21 13:53:42 2010 UTC (13 years, 9 months ago) by
bertrand
Branches:
MAIN
CVS tags:
rpl-4_1_3,
rpl-4_1_2,
rpl-4_1_1,
rpl-4_1_0,
rpl-4_0_24,
rpl-4_0_22,
rpl-4_0_21,
rpl-4_0_20,
rpl-4_0,
HEAD
Mise à jour de lapack vers la version 3.3.0.
1: SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
2: *
3: * -- LAPACK routine (version 3.2) --
4: * -- LAPACK is a software package provided by Univ. of Tennessee, --
5: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
6: * November 2006
7: *
8: * .. Scalar Arguments ..
9: INTEGER INFO, KL, KU, LDAB, M, N
10: * ..
11: * .. Array Arguments ..
12: INTEGER IPIV( * )
13: COMPLEX*16 AB( LDAB, * )
14: * ..
15: *
16: * Purpose
17: * =======
18: *
19: * ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
20: * using partial pivoting with row interchanges.
21: *
22: * This is the blocked version of the algorithm, calling Level 3 BLAS.
23: *
24: * Arguments
25: * =========
26: *
27: * M (input) INTEGER
28: * The number of rows of the matrix A. M >= 0.
29: *
30: * N (input) INTEGER
31: * The number of columns of the matrix A. N >= 0.
32: *
33: * KL (input) INTEGER
34: * The number of subdiagonals within the band of A. KL >= 0.
35: *
36: * KU (input) INTEGER
37: * The number of superdiagonals within the band of A. KU >= 0.
38: *
39: * AB (input/output) COMPLEX*16 array, dimension (LDAB,N)
40: * On entry, the matrix A in band storage, in rows KL+1 to
41: * 2*KL+KU+1; rows 1 to KL of the array need not be set.
42: * The j-th column of A is stored in the j-th column of the
43: * array AB as follows:
44: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
45: *
46: * On exit, details of the factorization: U is stored as an
47: * upper triangular band matrix with KL+KU superdiagonals in
48: * rows 1 to KL+KU+1, and the multipliers used during the
49: * factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
50: * See below for further details.
51: *
52: * LDAB (input) INTEGER
53: * The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
54: *
55: * IPIV (output) INTEGER array, dimension (min(M,N))
56: * The pivot indices; for 1 <= i <= min(M,N), row i of the
57: * matrix was interchanged with row IPIV(i).
58: *
59: * INFO (output) INTEGER
60: * = 0: successful exit
61: * < 0: if INFO = -i, the i-th argument had an illegal value
62: * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
63: * has been completed, but the factor U is exactly
64: * singular, and division by zero will occur if it is used
65: * to solve a system of equations.
66: *
67: * Further Details
68: * ===============
69: *
70: * The band storage scheme is illustrated by the following example, when
71: * M = N = 6, KL = 2, KU = 1:
72: *
73: * On entry: On exit:
74: *
75: * * * * + + + * * * u14 u25 u36
76: * * * + + + + * * u13 u24 u35 u46
77: * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
78: * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
79: * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
80: * a31 a42 a53 a64 * * m31 m42 m53 m64 * *
81: *
82: * Array elements marked * are not used by the routine; elements marked
83: * + need not be set on entry, but are required by the routine to store
84: * elements of U because of fill-in resulting from the row interchanges.
85: *
86: * =====================================================================
87: *
88: * .. Parameters ..
89: COMPLEX*16 ONE, ZERO
90: PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ),
91: $ ZERO = ( 0.0D+0, 0.0D+0 ) )
92: INTEGER NBMAX, LDWORK
93: PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
94: * ..
95: * .. Local Scalars ..
96: INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
97: $ JU, K2, KM, KV, NB, NW
98: COMPLEX*16 TEMP
99: * ..
100: * .. Local Arrays ..
101: COMPLEX*16 WORK13( LDWORK, NBMAX ),
102: $ WORK31( LDWORK, NBMAX )
103: * ..
104: * .. External Functions ..
105: INTEGER ILAENV, IZAMAX
106: EXTERNAL ILAENV, IZAMAX
107: * ..
108: * .. External Subroutines ..
109: EXTERNAL XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
110: $ ZSCAL, ZSWAP, ZTRSM
111: * ..
112: * .. Intrinsic Functions ..
113: INTRINSIC MAX, MIN
114: * ..
115: * .. Executable Statements ..
116: *
117: * KV is the number of superdiagonals in the factor U, allowing for
118: * fill-in
119: *
120: KV = KU + KL
121: *
122: * Test the input parameters.
123: *
124: INFO = 0
125: IF( M.LT.0 ) THEN
126: INFO = -1
127: ELSE IF( N.LT.0 ) THEN
128: INFO = -2
129: ELSE IF( KL.LT.0 ) THEN
130: INFO = -3
131: ELSE IF( KU.LT.0 ) THEN
132: INFO = -4
133: ELSE IF( LDAB.LT.KL+KV+1 ) THEN
134: INFO = -6
135: END IF
136: IF( INFO.NE.0 ) THEN
137: CALL XERBLA( 'ZGBTRF', -INFO )
138: RETURN
139: END IF
140: *
141: * Quick return if possible
142: *
143: IF( M.EQ.0 .OR. N.EQ.0 )
144: $ RETURN
145: *
146: * Determine the block size for this environment
147: *
148: NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU )
149: *
150: * The block size must not exceed the limit set by the size of the
151: * local arrays WORK13 and WORK31.
152: *
153: NB = MIN( NB, NBMAX )
154: *
155: IF( NB.LE.1 .OR. NB.GT.KL ) THEN
156: *
157: * Use unblocked code
158: *
159: CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
160: ELSE
161: *
162: * Use blocked code
163: *
164: * Zero the superdiagonal elements of the work array WORK13
165: *
166: DO 20 J = 1, NB
167: DO 10 I = 1, J - 1
168: WORK13( I, J ) = ZERO
169: 10 CONTINUE
170: 20 CONTINUE
171: *
172: * Zero the subdiagonal elements of the work array WORK31
173: *
174: DO 40 J = 1, NB
175: DO 30 I = J + 1, NB
176: WORK31( I, J ) = ZERO
177: 30 CONTINUE
178: 40 CONTINUE
179: *
180: * Gaussian elimination with partial pivoting
181: *
182: * Set fill-in elements in columns KU+2 to KV to zero
183: *
184: DO 60 J = KU + 2, MIN( KV, N )
185: DO 50 I = KV - J + 2, KL
186: AB( I, J ) = ZERO
187: 50 CONTINUE
188: 60 CONTINUE
189: *
190: * JU is the index of the last column affected by the current
191: * stage of the factorization
192: *
193: JU = 1
194: *
195: DO 180 J = 1, MIN( M, N ), NB
196: JB = MIN( NB, MIN( M, N )-J+1 )
197: *
198: * The active part of the matrix is partitioned
199: *
200: * A11 A12 A13
201: * A21 A22 A23
202: * A31 A32 A33
203: *
204: * Here A11, A21 and A31 denote the current block of JB columns
205: * which is about to be factorized. The number of rows in the
206: * partitioning are JB, I2, I3 respectively, and the numbers
207: * of columns are JB, J2, J3. The superdiagonal elements of A13
208: * and the subdiagonal elements of A31 lie outside the band.
209: *
210: I2 = MIN( KL-JB, M-J-JB+1 )
211: I3 = MIN( JB, M-J-KL+1 )
212: *
213: * J2 and J3 are computed after JU has been updated.
214: *
215: * Factorize the current block of JB columns
216: *
217: DO 80 JJ = J, J + JB - 1
218: *
219: * Set fill-in elements in column JJ+KV to zero
220: *
221: IF( JJ+KV.LE.N ) THEN
222: DO 70 I = 1, KL
223: AB( I, JJ+KV ) = ZERO
224: 70 CONTINUE
225: END IF
226: *
227: * Find pivot and test for singularity. KM is the number of
228: * subdiagonal elements in the current column.
229: *
230: KM = MIN( KL, M-JJ )
231: JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
232: IPIV( JJ ) = JP + JJ - J
233: IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
234: JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
235: IF( JP.NE.1 ) THEN
236: *
237: * Apply interchange to columns J to J+JB-1
238: *
239: IF( JP+JJ-1.LT.J+KL ) THEN
240: *
241: CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
242: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
243: ELSE
244: *
245: * The interchange affects columns J to JJ-1 of A31
246: * which are stored in the work array WORK31
247: *
248: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
249: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
250: CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
251: $ AB( KV+JP, JJ ), LDAB-1 )
252: END IF
253: END IF
254: *
255: * Compute multipliers
256: *
257: CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
258: $ 1 )
259: *
260: * Update trailing submatrix within the band and within
261: * the current block. JM is the index of the last column
262: * which needs to be updated.
263: *
264: JM = MIN( JU, J+JB-1 )
265: IF( JM.GT.JJ )
266: $ CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
267: $ AB( KV, JJ+1 ), LDAB-1,
268: $ AB( KV+1, JJ+1 ), LDAB-1 )
269: ELSE
270: *
271: * If pivot is zero, set INFO to the index of the pivot
272: * unless a zero pivot has already been found.
273: *
274: IF( INFO.EQ.0 )
275: $ INFO = JJ
276: END IF
277: *
278: * Copy current column of A31 into the work array WORK31
279: *
280: NW = MIN( JJ-J+1, I3 )
281: IF( NW.GT.0 )
282: $ CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
283: $ WORK31( 1, JJ-J+1 ), 1 )
284: 80 CONTINUE
285: IF( J+JB.LE.N ) THEN
286: *
287: * Apply the row interchanges to the other blocks.
288: *
289: J2 = MIN( JU-J+1, KV ) - JB
290: J3 = MAX( 0, JU-J-KV+1 )
291: *
292: * Use ZLASWP to apply the row interchanges to A12, A22, and
293: * A32.
294: *
295: CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
296: $ IPIV( J ), 1 )
297: *
298: * Adjust the pivot indices.
299: *
300: DO 90 I = J, J + JB - 1
301: IPIV( I ) = IPIV( I ) + J - 1
302: 90 CONTINUE
303: *
304: * Apply the row interchanges to A13, A23, and A33
305: * columnwise.
306: *
307: K2 = J - 1 + JB + J2
308: DO 110 I = 1, J3
309: JJ = K2 + I
310: DO 100 II = J + I - 1, J + JB - 1
311: IP = IPIV( II )
312: IF( IP.NE.II ) THEN
313: TEMP = AB( KV+1+II-JJ, JJ )
314: AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
315: AB( KV+1+IP-JJ, JJ ) = TEMP
316: END IF
317: 100 CONTINUE
318: 110 CONTINUE
319: *
320: * Update the relevant part of the trailing submatrix
321: *
322: IF( J2.GT.0 ) THEN
323: *
324: * Update A12
325: *
326: CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
327: $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
328: $ AB( KV+1-JB, J+JB ), LDAB-1 )
329: *
330: IF( I2.GT.0 ) THEN
331: *
332: * Update A22
333: *
334: CALL ZGEMM( 'No transpose', 'No transpose', I2, J2,
335: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
336: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
337: $ AB( KV+1, J+JB ), LDAB-1 )
338: END IF
339: *
340: IF( I3.GT.0 ) THEN
341: *
342: * Update A32
343: *
344: CALL ZGEMM( 'No transpose', 'No transpose', I3, J2,
345: $ JB, -ONE, WORK31, LDWORK,
346: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
347: $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
348: END IF
349: END IF
350: *
351: IF( J3.GT.0 ) THEN
352: *
353: * Copy the lower triangle of A13 into the work array
354: * WORK13
355: *
356: DO 130 JJ = 1, J3
357: DO 120 II = JJ, JB
358: WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
359: 120 CONTINUE
360: 130 CONTINUE
361: *
362: * Update A13 in the work array
363: *
364: CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
365: $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
366: $ WORK13, LDWORK )
367: *
368: IF( I2.GT.0 ) THEN
369: *
370: * Update A23
371: *
372: CALL ZGEMM( 'No transpose', 'No transpose', I2, J3,
373: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
374: $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
375: $ LDAB-1 )
376: END IF
377: *
378: IF( I3.GT.0 ) THEN
379: *
380: * Update A33
381: *
382: CALL ZGEMM( 'No transpose', 'No transpose', I3, J3,
383: $ JB, -ONE, WORK31, LDWORK, WORK13,
384: $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
385: END IF
386: *
387: * Copy the lower triangle of A13 back into place
388: *
389: DO 150 JJ = 1, J3
390: DO 140 II = JJ, JB
391: AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
392: 140 CONTINUE
393: 150 CONTINUE
394: END IF
395: ELSE
396: *
397: * Adjust the pivot indices.
398: *
399: DO 160 I = J, J + JB - 1
400: IPIV( I ) = IPIV( I ) + J - 1
401: 160 CONTINUE
402: END IF
403: *
404: * Partially undo the interchanges in the current block to
405: * restore the upper triangular form of A31 and copy the upper
406: * triangle of A31 back into place
407: *
408: DO 170 JJ = J + JB - 1, J, -1
409: JP = IPIV( JJ ) - JJ + 1
410: IF( JP.NE.1 ) THEN
411: *
412: * Apply interchange to columns J to JJ-1
413: *
414: IF( JP+JJ-1.LT.J+KL ) THEN
415: *
416: * The interchange does not affect A31
417: *
418: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
419: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
420: ELSE
421: *
422: * The interchange does affect A31
423: *
424: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
425: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
426: END IF
427: END IF
428: *
429: * Copy the current column of A31 back into place
430: *
431: NW = MIN( I3, JJ-J+1 )
432: IF( NW.GT.0 )
433: $ CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
434: $ AB( KV+KL+1-JJ+J, JJ ), 1 )
435: 170 CONTINUE
436: 180 CONTINUE
437: END IF
438: *
439: RETURN
440: *
441: * End of ZGBTRF
442: *
443: END
CVSweb interface <joel.bertrand@systella.fr>