Annotation of rpl/lapack/lapack/zgbtrf.f, revision 1.8
1.8 ! bertrand 1: *> \brief \b ZGBTRF
! 2: *
! 3: * =========== DOCUMENTATION ===========
! 4: *
! 5: * Online html documentation available at
! 6: * http://www.netlib.org/lapack/explore-html/
! 7: *
! 8: *> \htmlonly
! 9: *> Download ZGBTRF + dependencies
! 10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgbtrf.f">
! 11: *> [TGZ]</a>
! 12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgbtrf.f">
! 13: *> [ZIP]</a>
! 14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgbtrf.f">
! 15: *> [TXT]</a>
! 16: *> \endhtmlonly
! 17: *
! 18: * Definition:
! 19: * ===========
! 20: *
! 21: * SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
! 22: *
! 23: * .. Scalar Arguments ..
! 24: * INTEGER INFO, KL, KU, LDAB, M, N
! 25: * ..
! 26: * .. Array Arguments ..
! 27: * INTEGER IPIV( * )
! 28: * COMPLEX*16 AB( LDAB, * )
! 29: * ..
! 30: *
! 31: *
! 32: *> \par Purpose:
! 33: * =============
! 34: *>
! 35: *> \verbatim
! 36: *>
! 37: *> ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
! 38: *> using partial pivoting with row interchanges.
! 39: *>
! 40: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
! 41: *> \endverbatim
! 42: *
! 43: * Arguments:
! 44: * ==========
! 45: *
! 46: *> \param[in] M
! 47: *> \verbatim
! 48: *> M is INTEGER
! 49: *> The number of rows of the matrix A. M >= 0.
! 50: *> \endverbatim
! 51: *>
! 52: *> \param[in] N
! 53: *> \verbatim
! 54: *> N is INTEGER
! 55: *> The number of columns of the matrix A. N >= 0.
! 56: *> \endverbatim
! 57: *>
! 58: *> \param[in] KL
! 59: *> \verbatim
! 60: *> KL is INTEGER
! 61: *> The number of subdiagonals within the band of A. KL >= 0.
! 62: *> \endverbatim
! 63: *>
! 64: *> \param[in] KU
! 65: *> \verbatim
! 66: *> KU is INTEGER
! 67: *> The number of superdiagonals within the band of A. KU >= 0.
! 68: *> \endverbatim
! 69: *>
! 70: *> \param[in,out] AB
! 71: *> \verbatim
! 72: *> AB is COMPLEX*16 array, dimension (LDAB,N)
! 73: *> On entry, the matrix A in band storage, in rows KL+1 to
! 74: *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
! 75: *> The j-th column of A is stored in the j-th column of the
! 76: *> array AB as follows:
! 77: *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
! 78: *>
! 79: *> On exit, details of the factorization: U is stored as an
! 80: *> upper triangular band matrix with KL+KU superdiagonals in
! 81: *> rows 1 to KL+KU+1, and the multipliers used during the
! 82: *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
! 83: *> See below for further details.
! 84: *> \endverbatim
! 85: *>
! 86: *> \param[in] LDAB
! 87: *> \verbatim
! 88: *> LDAB is INTEGER
! 89: *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
! 90: *> \endverbatim
! 91: *>
! 92: *> \param[out] IPIV
! 93: *> \verbatim
! 94: *> IPIV is INTEGER array, dimension (min(M,N))
! 95: *> The pivot indices; for 1 <= i <= min(M,N), row i of the
! 96: *> matrix was interchanged with row IPIV(i).
! 97: *> \endverbatim
! 98: *>
! 99: *> \param[out] INFO
! 100: *> \verbatim
! 101: *> INFO is INTEGER
! 102: *> = 0: successful exit
! 103: *> < 0: if INFO = -i, the i-th argument had an illegal value
! 104: *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
! 105: *> has been completed, but the factor U is exactly
! 106: *> singular, and division by zero will occur if it is used
! 107: *> to solve a system of equations.
! 108: *> \endverbatim
! 109: *
! 110: * Authors:
! 111: * ========
! 112: *
! 113: *> \author Univ. of Tennessee
! 114: *> \author Univ. of California Berkeley
! 115: *> \author Univ. of Colorado Denver
! 116: *> \author NAG Ltd.
! 117: *
! 118: *> \date November 2011
! 119: *
! 120: *> \ingroup complex16GBcomputational
! 121: *
! 122: *> \par Further Details:
! 123: * =====================
! 124: *>
! 125: *> \verbatim
! 126: *>
! 127: *> The band storage scheme is illustrated by the following example, when
! 128: *> M = N = 6, KL = 2, KU = 1:
! 129: *>
! 130: *> On entry: On exit:
! 131: *>
! 132: *> * * * + + + * * * u14 u25 u36
! 133: *> * * + + + + * * u13 u24 u35 u46
! 134: *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
! 135: *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
! 136: *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
! 137: *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
! 138: *>
! 139: *> Array elements marked * are not used by the routine; elements marked
! 140: *> + need not be set on entry, but are required by the routine to store
! 141: *> elements of U because of fill-in resulting from the row interchanges.
! 142: *> \endverbatim
! 143: *>
! 144: * =====================================================================
1.1 bertrand 145: SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
146: *
1.8 ! bertrand 147: * -- LAPACK computational routine (version 3.4.0) --
1.1 bertrand 148: * -- LAPACK is a software package provided by Univ. of Tennessee, --
149: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
1.8 ! bertrand 150: * November 2011
1.1 bertrand 151: *
152: * .. Scalar Arguments ..
153: INTEGER INFO, KL, KU, LDAB, M, N
154: * ..
155: * .. Array Arguments ..
156: INTEGER IPIV( * )
157: COMPLEX*16 AB( LDAB, * )
158: * ..
159: *
160: * =====================================================================
161: *
162: * .. Parameters ..
163: COMPLEX*16 ONE, ZERO
164: PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ),
165: $ ZERO = ( 0.0D+0, 0.0D+0 ) )
166: INTEGER NBMAX, LDWORK
167: PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
168: * ..
169: * .. Local Scalars ..
170: INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
171: $ JU, K2, KM, KV, NB, NW
172: COMPLEX*16 TEMP
173: * ..
174: * .. Local Arrays ..
175: COMPLEX*16 WORK13( LDWORK, NBMAX ),
176: $ WORK31( LDWORK, NBMAX )
177: * ..
178: * .. External Functions ..
179: INTEGER ILAENV, IZAMAX
180: EXTERNAL ILAENV, IZAMAX
181: * ..
182: * .. External Subroutines ..
183: EXTERNAL XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
184: $ ZSCAL, ZSWAP, ZTRSM
185: * ..
186: * .. Intrinsic Functions ..
187: INTRINSIC MAX, MIN
188: * ..
189: * .. Executable Statements ..
190: *
191: * KV is the number of superdiagonals in the factor U, allowing for
192: * fill-in
193: *
194: KV = KU + KL
195: *
196: * Test the input parameters.
197: *
198: INFO = 0
199: IF( M.LT.0 ) THEN
200: INFO = -1
201: ELSE IF( N.LT.0 ) THEN
202: INFO = -2
203: ELSE IF( KL.LT.0 ) THEN
204: INFO = -3
205: ELSE IF( KU.LT.0 ) THEN
206: INFO = -4
207: ELSE IF( LDAB.LT.KL+KV+1 ) THEN
208: INFO = -6
209: END IF
210: IF( INFO.NE.0 ) THEN
211: CALL XERBLA( 'ZGBTRF', -INFO )
212: RETURN
213: END IF
214: *
215: * Quick return if possible
216: *
217: IF( M.EQ.0 .OR. N.EQ.0 )
218: $ RETURN
219: *
220: * Determine the block size for this environment
221: *
222: NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU )
223: *
224: * The block size must not exceed the limit set by the size of the
225: * local arrays WORK13 and WORK31.
226: *
227: NB = MIN( NB, NBMAX )
228: *
229: IF( NB.LE.1 .OR. NB.GT.KL ) THEN
230: *
231: * Use unblocked code
232: *
233: CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
234: ELSE
235: *
236: * Use blocked code
237: *
238: * Zero the superdiagonal elements of the work array WORK13
239: *
240: DO 20 J = 1, NB
241: DO 10 I = 1, J - 1
242: WORK13( I, J ) = ZERO
243: 10 CONTINUE
244: 20 CONTINUE
245: *
246: * Zero the subdiagonal elements of the work array WORK31
247: *
248: DO 40 J = 1, NB
249: DO 30 I = J + 1, NB
250: WORK31( I, J ) = ZERO
251: 30 CONTINUE
252: 40 CONTINUE
253: *
254: * Gaussian elimination with partial pivoting
255: *
256: * Set fill-in elements in columns KU+2 to KV to zero
257: *
258: DO 60 J = KU + 2, MIN( KV, N )
259: DO 50 I = KV - J + 2, KL
260: AB( I, J ) = ZERO
261: 50 CONTINUE
262: 60 CONTINUE
263: *
264: * JU is the index of the last column affected by the current
265: * stage of the factorization
266: *
267: JU = 1
268: *
269: DO 180 J = 1, MIN( M, N ), NB
270: JB = MIN( NB, MIN( M, N )-J+1 )
271: *
272: * The active part of the matrix is partitioned
273: *
274: * A11 A12 A13
275: * A21 A22 A23
276: * A31 A32 A33
277: *
278: * Here A11, A21 and A31 denote the current block of JB columns
279: * which is about to be factorized. The number of rows in the
280: * partitioning are JB, I2, I3 respectively, and the numbers
281: * of columns are JB, J2, J3. The superdiagonal elements of A13
282: * and the subdiagonal elements of A31 lie outside the band.
283: *
284: I2 = MIN( KL-JB, M-J-JB+1 )
285: I3 = MIN( JB, M-J-KL+1 )
286: *
287: * J2 and J3 are computed after JU has been updated.
288: *
289: * Factorize the current block of JB columns
290: *
291: DO 80 JJ = J, J + JB - 1
292: *
293: * Set fill-in elements in column JJ+KV to zero
294: *
295: IF( JJ+KV.LE.N ) THEN
296: DO 70 I = 1, KL
297: AB( I, JJ+KV ) = ZERO
298: 70 CONTINUE
299: END IF
300: *
301: * Find pivot and test for singularity. KM is the number of
302: * subdiagonal elements in the current column.
303: *
304: KM = MIN( KL, M-JJ )
305: JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
306: IPIV( JJ ) = JP + JJ - J
307: IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
308: JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
309: IF( JP.NE.1 ) THEN
310: *
311: * Apply interchange to columns J to J+JB-1
312: *
313: IF( JP+JJ-1.LT.J+KL ) THEN
314: *
315: CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
316: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
317: ELSE
318: *
319: * The interchange affects columns J to JJ-1 of A31
320: * which are stored in the work array WORK31
321: *
322: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
323: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
324: CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
325: $ AB( KV+JP, JJ ), LDAB-1 )
326: END IF
327: END IF
328: *
329: * Compute multipliers
330: *
331: CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
332: $ 1 )
333: *
334: * Update trailing submatrix within the band and within
335: * the current block. JM is the index of the last column
336: * which needs to be updated.
337: *
338: JM = MIN( JU, J+JB-1 )
339: IF( JM.GT.JJ )
340: $ CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
341: $ AB( KV, JJ+1 ), LDAB-1,
342: $ AB( KV+1, JJ+1 ), LDAB-1 )
343: ELSE
344: *
345: * If pivot is zero, set INFO to the index of the pivot
346: * unless a zero pivot has already been found.
347: *
348: IF( INFO.EQ.0 )
349: $ INFO = JJ
350: END IF
351: *
352: * Copy current column of A31 into the work array WORK31
353: *
354: NW = MIN( JJ-J+1, I3 )
355: IF( NW.GT.0 )
356: $ CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
357: $ WORK31( 1, JJ-J+1 ), 1 )
358: 80 CONTINUE
359: IF( J+JB.LE.N ) THEN
360: *
361: * Apply the row interchanges to the other blocks.
362: *
363: J2 = MIN( JU-J+1, KV ) - JB
364: J3 = MAX( 0, JU-J-KV+1 )
365: *
366: * Use ZLASWP to apply the row interchanges to A12, A22, and
367: * A32.
368: *
369: CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
370: $ IPIV( J ), 1 )
371: *
372: * Adjust the pivot indices.
373: *
374: DO 90 I = J, J + JB - 1
375: IPIV( I ) = IPIV( I ) + J - 1
376: 90 CONTINUE
377: *
378: * Apply the row interchanges to A13, A23, and A33
379: * columnwise.
380: *
381: K2 = J - 1 + JB + J2
382: DO 110 I = 1, J3
383: JJ = K2 + I
384: DO 100 II = J + I - 1, J + JB - 1
385: IP = IPIV( II )
386: IF( IP.NE.II ) THEN
387: TEMP = AB( KV+1+II-JJ, JJ )
388: AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
389: AB( KV+1+IP-JJ, JJ ) = TEMP
390: END IF
391: 100 CONTINUE
392: 110 CONTINUE
393: *
394: * Update the relevant part of the trailing submatrix
395: *
396: IF( J2.GT.0 ) THEN
397: *
398: * Update A12
399: *
400: CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
401: $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
402: $ AB( KV+1-JB, J+JB ), LDAB-1 )
403: *
404: IF( I2.GT.0 ) THEN
405: *
406: * Update A22
407: *
408: CALL ZGEMM( 'No transpose', 'No transpose', I2, J2,
409: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
410: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
411: $ AB( KV+1, J+JB ), LDAB-1 )
412: END IF
413: *
414: IF( I3.GT.0 ) THEN
415: *
416: * Update A32
417: *
418: CALL ZGEMM( 'No transpose', 'No transpose', I3, J2,
419: $ JB, -ONE, WORK31, LDWORK,
420: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
421: $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
422: END IF
423: END IF
424: *
425: IF( J3.GT.0 ) THEN
426: *
427: * Copy the lower triangle of A13 into the work array
428: * WORK13
429: *
430: DO 130 JJ = 1, J3
431: DO 120 II = JJ, JB
432: WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
433: 120 CONTINUE
434: 130 CONTINUE
435: *
436: * Update A13 in the work array
437: *
438: CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
439: $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
440: $ WORK13, LDWORK )
441: *
442: IF( I2.GT.0 ) THEN
443: *
444: * Update A23
445: *
446: CALL ZGEMM( 'No transpose', 'No transpose', I2, J3,
447: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
448: $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
449: $ LDAB-1 )
450: END IF
451: *
452: IF( I3.GT.0 ) THEN
453: *
454: * Update A33
455: *
456: CALL ZGEMM( 'No transpose', 'No transpose', I3, J3,
457: $ JB, -ONE, WORK31, LDWORK, WORK13,
458: $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
459: END IF
460: *
461: * Copy the lower triangle of A13 back into place
462: *
463: DO 150 JJ = 1, J3
464: DO 140 II = JJ, JB
465: AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
466: 140 CONTINUE
467: 150 CONTINUE
468: END IF
469: ELSE
470: *
471: * Adjust the pivot indices.
472: *
473: DO 160 I = J, J + JB - 1
474: IPIV( I ) = IPIV( I ) + J - 1
475: 160 CONTINUE
476: END IF
477: *
478: * Partially undo the interchanges in the current block to
479: * restore the upper triangular form of A31 and copy the upper
480: * triangle of A31 back into place
481: *
482: DO 170 JJ = J + JB - 1, J, -1
483: JP = IPIV( JJ ) - JJ + 1
484: IF( JP.NE.1 ) THEN
485: *
486: * Apply interchange to columns J to JJ-1
487: *
488: IF( JP+JJ-1.LT.J+KL ) THEN
489: *
490: * The interchange does not affect A31
491: *
492: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
493: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
494: ELSE
495: *
496: * The interchange does affect A31
497: *
498: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
499: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
500: END IF
501: END IF
502: *
503: * Copy the current column of A31 back into place
504: *
505: NW = MIN( I3, JJ-J+1 )
506: IF( NW.GT.0 )
507: $ CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
508: $ AB( KV+KL+1-JJ+J, JJ ), 1 )
509: 170 CONTINUE
510: 180 CONTINUE
511: END IF
512: *
513: RETURN
514: *
515: * End of ZGBTRF
516: *
517: END
CVSweb interface <joel.bertrand@systella.fr>