1: *> \brief \b ZGBTRF
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZGBTRF + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgbtrf.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgbtrf.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgbtrf.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
22: *
23: * .. Scalar Arguments ..
24: * INTEGER INFO, KL, KU, LDAB, M, N
25: * ..
26: * .. Array Arguments ..
27: * INTEGER IPIV( * )
28: * COMPLEX*16 AB( LDAB, * )
29: * ..
30: *
31: *
32: *> \par Purpose:
33: * =============
34: *>
35: *> \verbatim
36: *>
37: *> ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
38: *> using partial pivoting with row interchanges.
39: *>
40: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
41: *> \endverbatim
42: *
43: * Arguments:
44: * ==========
45: *
46: *> \param[in] M
47: *> \verbatim
48: *> M is INTEGER
49: *> The number of rows of the matrix A. M >= 0.
50: *> \endverbatim
51: *>
52: *> \param[in] N
53: *> \verbatim
54: *> N is INTEGER
55: *> The number of columns of the matrix A. N >= 0.
56: *> \endverbatim
57: *>
58: *> \param[in] KL
59: *> \verbatim
60: *> KL is INTEGER
61: *> The number of subdiagonals within the band of A. KL >= 0.
62: *> \endverbatim
63: *>
64: *> \param[in] KU
65: *> \verbatim
66: *> KU is INTEGER
67: *> The number of superdiagonals within the band of A. KU >= 0.
68: *> \endverbatim
69: *>
70: *> \param[in,out] AB
71: *> \verbatim
72: *> AB is COMPLEX*16 array, dimension (LDAB,N)
73: *> On entry, the matrix A in band storage, in rows KL+1 to
74: *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
75: *> The j-th column of A is stored in the j-th column of the
76: *> array AB as follows:
77: *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
78: *>
79: *> On exit, details of the factorization: U is stored as an
80: *> upper triangular band matrix with KL+KU superdiagonals in
81: *> rows 1 to KL+KU+1, and the multipliers used during the
82: *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
83: *> See below for further details.
84: *> \endverbatim
85: *>
86: *> \param[in] LDAB
87: *> \verbatim
88: *> LDAB is INTEGER
89: *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
90: *> \endverbatim
91: *>
92: *> \param[out] IPIV
93: *> \verbatim
94: *> IPIV is INTEGER array, dimension (min(M,N))
95: *> The pivot indices; for 1 <= i <= min(M,N), row i of the
96: *> matrix was interchanged with row IPIV(i).
97: *> \endverbatim
98: *>
99: *> \param[out] INFO
100: *> \verbatim
101: *> INFO is INTEGER
102: *> = 0: successful exit
103: *> < 0: if INFO = -i, the i-th argument had an illegal value
104: *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
105: *> has been completed, but the factor U is exactly
106: *> singular, and division by zero will occur if it is used
107: *> to solve a system of equations.
108: *> \endverbatim
109: *
110: * Authors:
111: * ========
112: *
113: *> \author Univ. of Tennessee
114: *> \author Univ. of California Berkeley
115: *> \author Univ. of Colorado Denver
116: *> \author NAG Ltd.
117: *
118: *> \ingroup complex16GBcomputational
119: *
120: *> \par Further Details:
121: * =====================
122: *>
123: *> \verbatim
124: *>
125: *> The band storage scheme is illustrated by the following example, when
126: *> M = N = 6, KL = 2, KU = 1:
127: *>
128: *> On entry: On exit:
129: *>
130: *> * * * + + + * * * u14 u25 u36
131: *> * * + + + + * * u13 u24 u35 u46
132: *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
133: *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
134: *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
135: *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
136: *>
137: *> Array elements marked * are not used by the routine; elements marked
138: *> + need not be set on entry, but are required by the routine to store
139: *> elements of U because of fill-in resulting from the row interchanges.
140: *> \endverbatim
141: *>
142: * =====================================================================
143: SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
144: *
145: * -- LAPACK computational routine --
146: * -- LAPACK is a software package provided by Univ. of Tennessee, --
147: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
148: *
149: * .. Scalar Arguments ..
150: INTEGER INFO, KL, KU, LDAB, M, N
151: * ..
152: * .. Array Arguments ..
153: INTEGER IPIV( * )
154: COMPLEX*16 AB( LDAB, * )
155: * ..
156: *
157: * =====================================================================
158: *
159: * .. Parameters ..
160: COMPLEX*16 ONE, ZERO
161: PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ),
162: $ ZERO = ( 0.0D+0, 0.0D+0 ) )
163: INTEGER NBMAX, LDWORK
164: PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
165: * ..
166: * .. Local Scalars ..
167: INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
168: $ JU, K2, KM, KV, NB, NW
169: COMPLEX*16 TEMP
170: * ..
171: * .. Local Arrays ..
172: COMPLEX*16 WORK13( LDWORK, NBMAX ),
173: $ WORK31( LDWORK, NBMAX )
174: * ..
175: * .. External Functions ..
176: INTEGER ILAENV, IZAMAX
177: EXTERNAL ILAENV, IZAMAX
178: * ..
179: * .. External Subroutines ..
180: EXTERNAL XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
181: $ ZSCAL, ZSWAP, ZTRSM
182: * ..
183: * .. Intrinsic Functions ..
184: INTRINSIC MAX, MIN
185: * ..
186: * .. Executable Statements ..
187: *
188: * KV is the number of superdiagonals in the factor U, allowing for
189: * fill-in
190: *
191: KV = KU + KL
192: *
193: * Test the input parameters.
194: *
195: INFO = 0
196: IF( M.LT.0 ) THEN
197: INFO = -1
198: ELSE IF( N.LT.0 ) THEN
199: INFO = -2
200: ELSE IF( KL.LT.0 ) THEN
201: INFO = -3
202: ELSE IF( KU.LT.0 ) THEN
203: INFO = -4
204: ELSE IF( LDAB.LT.KL+KV+1 ) THEN
205: INFO = -6
206: END IF
207: IF( INFO.NE.0 ) THEN
208: CALL XERBLA( 'ZGBTRF', -INFO )
209: RETURN
210: END IF
211: *
212: * Quick return if possible
213: *
214: IF( M.EQ.0 .OR. N.EQ.0 )
215: $ RETURN
216: *
217: * Determine the block size for this environment
218: *
219: NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU )
220: *
221: * The block size must not exceed the limit set by the size of the
222: * local arrays WORK13 and WORK31.
223: *
224: NB = MIN( NB, NBMAX )
225: *
226: IF( NB.LE.1 .OR. NB.GT.KL ) THEN
227: *
228: * Use unblocked code
229: *
230: CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
231: ELSE
232: *
233: * Use blocked code
234: *
235: * Zero the superdiagonal elements of the work array WORK13
236: *
237: DO 20 J = 1, NB
238: DO 10 I = 1, J - 1
239: WORK13( I, J ) = ZERO
240: 10 CONTINUE
241: 20 CONTINUE
242: *
243: * Zero the subdiagonal elements of the work array WORK31
244: *
245: DO 40 J = 1, NB
246: DO 30 I = J + 1, NB
247: WORK31( I, J ) = ZERO
248: 30 CONTINUE
249: 40 CONTINUE
250: *
251: * Gaussian elimination with partial pivoting
252: *
253: * Set fill-in elements in columns KU+2 to KV to zero
254: *
255: DO 60 J = KU + 2, MIN( KV, N )
256: DO 50 I = KV - J + 2, KL
257: AB( I, J ) = ZERO
258: 50 CONTINUE
259: 60 CONTINUE
260: *
261: * JU is the index of the last column affected by the current
262: * stage of the factorization
263: *
264: JU = 1
265: *
266: DO 180 J = 1, MIN( M, N ), NB
267: JB = MIN( NB, MIN( M, N )-J+1 )
268: *
269: * The active part of the matrix is partitioned
270: *
271: * A11 A12 A13
272: * A21 A22 A23
273: * A31 A32 A33
274: *
275: * Here A11, A21 and A31 denote the current block of JB columns
276: * which is about to be factorized. The number of rows in the
277: * partitioning are JB, I2, I3 respectively, and the numbers
278: * of columns are JB, J2, J3. The superdiagonal elements of A13
279: * and the subdiagonal elements of A31 lie outside the band.
280: *
281: I2 = MIN( KL-JB, M-J-JB+1 )
282: I3 = MIN( JB, M-J-KL+1 )
283: *
284: * J2 and J3 are computed after JU has been updated.
285: *
286: * Factorize the current block of JB columns
287: *
288: DO 80 JJ = J, J + JB - 1
289: *
290: * Set fill-in elements in column JJ+KV to zero
291: *
292: IF( JJ+KV.LE.N ) THEN
293: DO 70 I = 1, KL
294: AB( I, JJ+KV ) = ZERO
295: 70 CONTINUE
296: END IF
297: *
298: * Find pivot and test for singularity. KM is the number of
299: * subdiagonal elements in the current column.
300: *
301: KM = MIN( KL, M-JJ )
302: JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
303: IPIV( JJ ) = JP + JJ - J
304: IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
305: JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
306: IF( JP.NE.1 ) THEN
307: *
308: * Apply interchange to columns J to J+JB-1
309: *
310: IF( JP+JJ-1.LT.J+KL ) THEN
311: *
312: CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
313: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
314: ELSE
315: *
316: * The interchange affects columns J to JJ-1 of A31
317: * which are stored in the work array WORK31
318: *
319: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
320: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
321: CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
322: $ AB( KV+JP, JJ ), LDAB-1 )
323: END IF
324: END IF
325: *
326: * Compute multipliers
327: *
328: CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
329: $ 1 )
330: *
331: * Update trailing submatrix within the band and within
332: * the current block. JM is the index of the last column
333: * which needs to be updated.
334: *
335: JM = MIN( JU, J+JB-1 )
336: IF( JM.GT.JJ )
337: $ CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
338: $ AB( KV, JJ+1 ), LDAB-1,
339: $ AB( KV+1, JJ+1 ), LDAB-1 )
340: ELSE
341: *
342: * If pivot is zero, set INFO to the index of the pivot
343: * unless a zero pivot has already been found.
344: *
345: IF( INFO.EQ.0 )
346: $ INFO = JJ
347: END IF
348: *
349: * Copy current column of A31 into the work array WORK31
350: *
351: NW = MIN( JJ-J+1, I3 )
352: IF( NW.GT.0 )
353: $ CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
354: $ WORK31( 1, JJ-J+1 ), 1 )
355: 80 CONTINUE
356: IF( J+JB.LE.N ) THEN
357: *
358: * Apply the row interchanges to the other blocks.
359: *
360: J2 = MIN( JU-J+1, KV ) - JB
361: J3 = MAX( 0, JU-J-KV+1 )
362: *
363: * Use ZLASWP to apply the row interchanges to A12, A22, and
364: * A32.
365: *
366: CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
367: $ IPIV( J ), 1 )
368: *
369: * Adjust the pivot indices.
370: *
371: DO 90 I = J, J + JB - 1
372: IPIV( I ) = IPIV( I ) + J - 1
373: 90 CONTINUE
374: *
375: * Apply the row interchanges to A13, A23, and A33
376: * columnwise.
377: *
378: K2 = J - 1 + JB + J2
379: DO 110 I = 1, J3
380: JJ = K2 + I
381: DO 100 II = J + I - 1, J + JB - 1
382: IP = IPIV( II )
383: IF( IP.NE.II ) THEN
384: TEMP = AB( KV+1+II-JJ, JJ )
385: AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
386: AB( KV+1+IP-JJ, JJ ) = TEMP
387: END IF
388: 100 CONTINUE
389: 110 CONTINUE
390: *
391: * Update the relevant part of the trailing submatrix
392: *
393: IF( J2.GT.0 ) THEN
394: *
395: * Update A12
396: *
397: CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
398: $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
399: $ AB( KV+1-JB, J+JB ), LDAB-1 )
400: *
401: IF( I2.GT.0 ) THEN
402: *
403: * Update A22
404: *
405: CALL ZGEMM( 'No transpose', 'No transpose', I2, J2,
406: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
407: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
408: $ AB( KV+1, J+JB ), LDAB-1 )
409: END IF
410: *
411: IF( I3.GT.0 ) THEN
412: *
413: * Update A32
414: *
415: CALL ZGEMM( 'No transpose', 'No transpose', I3, J2,
416: $ JB, -ONE, WORK31, LDWORK,
417: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
418: $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
419: END IF
420: END IF
421: *
422: IF( J3.GT.0 ) THEN
423: *
424: * Copy the lower triangle of A13 into the work array
425: * WORK13
426: *
427: DO 130 JJ = 1, J3
428: DO 120 II = JJ, JB
429: WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
430: 120 CONTINUE
431: 130 CONTINUE
432: *
433: * Update A13 in the work array
434: *
435: CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
436: $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
437: $ WORK13, LDWORK )
438: *
439: IF( I2.GT.0 ) THEN
440: *
441: * Update A23
442: *
443: CALL ZGEMM( 'No transpose', 'No transpose', I2, J3,
444: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
445: $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
446: $ LDAB-1 )
447: END IF
448: *
449: IF( I3.GT.0 ) THEN
450: *
451: * Update A33
452: *
453: CALL ZGEMM( 'No transpose', 'No transpose', I3, J3,
454: $ JB, -ONE, WORK31, LDWORK, WORK13,
455: $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
456: END IF
457: *
458: * Copy the lower triangle of A13 back into place
459: *
460: DO 150 JJ = 1, J3
461: DO 140 II = JJ, JB
462: AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
463: 140 CONTINUE
464: 150 CONTINUE
465: END IF
466: ELSE
467: *
468: * Adjust the pivot indices.
469: *
470: DO 160 I = J, J + JB - 1
471: IPIV( I ) = IPIV( I ) + J - 1
472: 160 CONTINUE
473: END IF
474: *
475: * Partially undo the interchanges in the current block to
476: * restore the upper triangular form of A31 and copy the upper
477: * triangle of A31 back into place
478: *
479: DO 170 JJ = J + JB - 1, J, -1
480: JP = IPIV( JJ ) - JJ + 1
481: IF( JP.NE.1 ) THEN
482: *
483: * Apply interchange to columns J to JJ-1
484: *
485: IF( JP+JJ-1.LT.J+KL ) THEN
486: *
487: * The interchange does not affect A31
488: *
489: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
490: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
491: ELSE
492: *
493: * The interchange does affect A31
494: *
495: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
496: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
497: END IF
498: END IF
499: *
500: * Copy the current column of A31 back into place
501: *
502: NW = MIN( I3, JJ-J+1 )
503: IF( NW.GT.0 )
504: $ CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
505: $ AB( KV+KL+1-JJ+J, JJ ), 1 )
506: 170 CONTINUE
507: 180 CONTINUE
508: END IF
509: *
510: RETURN
511: *
512: * End of ZGBTRF
513: *
514: END
CVSweb interface <joel.bertrand@systella.fr>