Annotation of rpl/lapack/lapack/zgbtrf.f, revision 1.1
1.1 ! bertrand 1: SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
! 2: *
! 3: * -- LAPACK routine (version 3.2) --
! 4: * -- LAPACK is a software package provided by Univ. of Tennessee, --
! 5: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
! 6: * November 2006
! 7: *
! 8: * .. Scalar Arguments ..
! 9: INTEGER INFO, KL, KU, LDAB, M, N
! 10: * ..
! 11: * .. Array Arguments ..
! 12: INTEGER IPIV( * )
! 13: COMPLEX*16 AB( LDAB, * )
! 14: * ..
! 15: *
! 16: * Purpose
! 17: * =======
! 18: *
! 19: * ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
! 20: * using partial pivoting with row interchanges.
! 21: *
! 22: * This is the blocked version of the algorithm, calling Level 3 BLAS.
! 23: *
! 24: * Arguments
! 25: * =========
! 26: *
! 27: * M (input) INTEGER
! 28: * The number of rows of the matrix A. M >= 0.
! 29: *
! 30: * N (input) INTEGER
! 31: * The number of columns of the matrix A. N >= 0.
! 32: *
! 33: * KL (input) INTEGER
! 34: * The number of subdiagonals within the band of A. KL >= 0.
! 35: *
! 36: * KU (input) INTEGER
! 37: * The number of superdiagonals within the band of A. KU >= 0.
! 38: *
! 39: * AB (input/output) COMPLEX*16 array, dimension (LDAB,N)
! 40: * On entry, the matrix A in band storage, in rows KL+1 to
! 41: * 2*KL+KU+1; rows 1 to KL of the array need not be set.
! 42: * The j-th column of A is stored in the j-th column of the
! 43: * array AB as follows:
! 44: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
! 45: *
! 46: * On exit, details of the factorization: U is stored as an
! 47: * upper triangular band matrix with KL+KU superdiagonals in
! 48: * rows 1 to KL+KU+1, and the multipliers used during the
! 49: * factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
! 50: * See below for further details.
! 51: *
! 52: * LDAB (input) INTEGER
! 53: * The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
! 54: *
! 55: * IPIV (output) INTEGER array, dimension (min(M,N))
! 56: * The pivot indices; for 1 <= i <= min(M,N), row i of the
! 57: * matrix was interchanged with row IPIV(i).
! 58: *
! 59: * INFO (output) INTEGER
! 60: * = 0: successful exit
! 61: * < 0: if INFO = -i, the i-th argument had an illegal value
! 62: * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
! 63: * has been completed, but the factor U is exactly
! 64: * singular, and division by zero will occur if it is used
! 65: * to solve a system of equations.
! 66: *
! 67: * Further Details
! 68: * ===============
! 69: *
! 70: * The band storage scheme is illustrated by the following example, when
! 71: * M = N = 6, KL = 2, KU = 1:
! 72: *
! 73: * On entry: On exit:
! 74: *
! 75: * * * * + + + * * * u14 u25 u36
! 76: * * * + + + + * * u13 u24 u35 u46
! 77: * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
! 78: * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
! 79: * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
! 80: * a31 a42 a53 a64 * * m31 m42 m53 m64 * *
! 81: *
! 82: * Array elements marked * are not used by the routine; elements marked
! 83: * + need not be set on entry, but are required by the routine to store
! 84: * elements of U because of fill-in resulting from the row interchanges.
! 85: *
! 86: * =====================================================================
! 87: *
! 88: * .. Parameters ..
! 89: COMPLEX*16 ONE, ZERO
! 90: PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ),
! 91: $ ZERO = ( 0.0D+0, 0.0D+0 ) )
! 92: INTEGER NBMAX, LDWORK
! 93: PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
! 94: * ..
! 95: * .. Local Scalars ..
! 96: INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
! 97: $ JU, K2, KM, KV, NB, NW
! 98: COMPLEX*16 TEMP
! 99: * ..
! 100: * .. Local Arrays ..
! 101: COMPLEX*16 WORK13( LDWORK, NBMAX ),
! 102: $ WORK31( LDWORK, NBMAX )
! 103: * ..
! 104: * .. External Functions ..
! 105: INTEGER ILAENV, IZAMAX
! 106: EXTERNAL ILAENV, IZAMAX
! 107: * ..
! 108: * .. External Subroutines ..
! 109: EXTERNAL XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
! 110: $ ZSCAL, ZSWAP, ZTRSM
! 111: * ..
! 112: * .. Intrinsic Functions ..
! 113: INTRINSIC MAX, MIN
! 114: * ..
! 115: * .. Executable Statements ..
! 116: *
! 117: * KV is the number of superdiagonals in the factor U, allowing for
! 118: * fill-in
! 119: *
! 120: KV = KU + KL
! 121: *
! 122: * Test the input parameters.
! 123: *
! 124: INFO = 0
! 125: IF( M.LT.0 ) THEN
! 126: INFO = -1
! 127: ELSE IF( N.LT.0 ) THEN
! 128: INFO = -2
! 129: ELSE IF( KL.LT.0 ) THEN
! 130: INFO = -3
! 131: ELSE IF( KU.LT.0 ) THEN
! 132: INFO = -4
! 133: ELSE IF( LDAB.LT.KL+KV+1 ) THEN
! 134: INFO = -6
! 135: END IF
! 136: IF( INFO.NE.0 ) THEN
! 137: CALL XERBLA( 'ZGBTRF', -INFO )
! 138: RETURN
! 139: END IF
! 140: *
! 141: * Quick return if possible
! 142: *
! 143: IF( M.EQ.0 .OR. N.EQ.0 )
! 144: $ RETURN
! 145: *
! 146: * Determine the block size for this environment
! 147: *
! 148: NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU )
! 149: *
! 150: * The block size must not exceed the limit set by the size of the
! 151: * local arrays WORK13 and WORK31.
! 152: *
! 153: NB = MIN( NB, NBMAX )
! 154: *
! 155: IF( NB.LE.1 .OR. NB.GT.KL ) THEN
! 156: *
! 157: * Use unblocked code
! 158: *
! 159: CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
! 160: ELSE
! 161: *
! 162: * Use blocked code
! 163: *
! 164: * Zero the superdiagonal elements of the work array WORK13
! 165: *
! 166: DO 20 J = 1, NB
! 167: DO 10 I = 1, J - 1
! 168: WORK13( I, J ) = ZERO
! 169: 10 CONTINUE
! 170: 20 CONTINUE
! 171: *
! 172: * Zero the subdiagonal elements of the work array WORK31
! 173: *
! 174: DO 40 J = 1, NB
! 175: DO 30 I = J + 1, NB
! 176: WORK31( I, J ) = ZERO
! 177: 30 CONTINUE
! 178: 40 CONTINUE
! 179: *
! 180: * Gaussian elimination with partial pivoting
! 181: *
! 182: * Set fill-in elements in columns KU+2 to KV to zero
! 183: *
! 184: DO 60 J = KU + 2, MIN( KV, N )
! 185: DO 50 I = KV - J + 2, KL
! 186: AB( I, J ) = ZERO
! 187: 50 CONTINUE
! 188: 60 CONTINUE
! 189: *
! 190: * JU is the index of the last column affected by the current
! 191: * stage of the factorization
! 192: *
! 193: JU = 1
! 194: *
! 195: DO 180 J = 1, MIN( M, N ), NB
! 196: JB = MIN( NB, MIN( M, N )-J+1 )
! 197: *
! 198: * The active part of the matrix is partitioned
! 199: *
! 200: * A11 A12 A13
! 201: * A21 A22 A23
! 202: * A31 A32 A33
! 203: *
! 204: * Here A11, A21 and A31 denote the current block of JB columns
! 205: * which is about to be factorized. The number of rows in the
! 206: * partitioning are JB, I2, I3 respectively, and the numbers
! 207: * of columns are JB, J2, J3. The superdiagonal elements of A13
! 208: * and the subdiagonal elements of A31 lie outside the band.
! 209: *
! 210: I2 = MIN( KL-JB, M-J-JB+1 )
! 211: I3 = MIN( JB, M-J-KL+1 )
! 212: *
! 213: * J2 and J3 are computed after JU has been updated.
! 214: *
! 215: * Factorize the current block of JB columns
! 216: *
! 217: DO 80 JJ = J, J + JB - 1
! 218: *
! 219: * Set fill-in elements in column JJ+KV to zero
! 220: *
! 221: IF( JJ+KV.LE.N ) THEN
! 222: DO 70 I = 1, KL
! 223: AB( I, JJ+KV ) = ZERO
! 224: 70 CONTINUE
! 225: END IF
! 226: *
! 227: * Find pivot and test for singularity. KM is the number of
! 228: * subdiagonal elements in the current column.
! 229: *
! 230: KM = MIN( KL, M-JJ )
! 231: JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
! 232: IPIV( JJ ) = JP + JJ - J
! 233: IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
! 234: JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
! 235: IF( JP.NE.1 ) THEN
! 236: *
! 237: * Apply interchange to columns J to J+JB-1
! 238: *
! 239: IF( JP+JJ-1.LT.J+KL ) THEN
! 240: *
! 241: CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
! 242: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
! 243: ELSE
! 244: *
! 245: * The interchange affects columns J to JJ-1 of A31
! 246: * which are stored in the work array WORK31
! 247: *
! 248: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
! 249: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
! 250: CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
! 251: $ AB( KV+JP, JJ ), LDAB-1 )
! 252: END IF
! 253: END IF
! 254: *
! 255: * Compute multipliers
! 256: *
! 257: CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
! 258: $ 1 )
! 259: *
! 260: * Update trailing submatrix within the band and within
! 261: * the current block. JM is the index of the last column
! 262: * which needs to be updated.
! 263: *
! 264: JM = MIN( JU, J+JB-1 )
! 265: IF( JM.GT.JJ )
! 266: $ CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
! 267: $ AB( KV, JJ+1 ), LDAB-1,
! 268: $ AB( KV+1, JJ+1 ), LDAB-1 )
! 269: ELSE
! 270: *
! 271: * If pivot is zero, set INFO to the index of the pivot
! 272: * unless a zero pivot has already been found.
! 273: *
! 274: IF( INFO.EQ.0 )
! 275: $ INFO = JJ
! 276: END IF
! 277: *
! 278: * Copy current column of A31 into the work array WORK31
! 279: *
! 280: NW = MIN( JJ-J+1, I3 )
! 281: IF( NW.GT.0 )
! 282: $ CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
! 283: $ WORK31( 1, JJ-J+1 ), 1 )
! 284: 80 CONTINUE
! 285: IF( J+JB.LE.N ) THEN
! 286: *
! 287: * Apply the row interchanges to the other blocks.
! 288: *
! 289: J2 = MIN( JU-J+1, KV ) - JB
! 290: J3 = MAX( 0, JU-J-KV+1 )
! 291: *
! 292: * Use ZLASWP to apply the row interchanges to A12, A22, and
! 293: * A32.
! 294: *
! 295: CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
! 296: $ IPIV( J ), 1 )
! 297: *
! 298: * Adjust the pivot indices.
! 299: *
! 300: DO 90 I = J, J + JB - 1
! 301: IPIV( I ) = IPIV( I ) + J - 1
! 302: 90 CONTINUE
! 303: *
! 304: * Apply the row interchanges to A13, A23, and A33
! 305: * columnwise.
! 306: *
! 307: K2 = J - 1 + JB + J2
! 308: DO 110 I = 1, J3
! 309: JJ = K2 + I
! 310: DO 100 II = J + I - 1, J + JB - 1
! 311: IP = IPIV( II )
! 312: IF( IP.NE.II ) THEN
! 313: TEMP = AB( KV+1+II-JJ, JJ )
! 314: AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
! 315: AB( KV+1+IP-JJ, JJ ) = TEMP
! 316: END IF
! 317: 100 CONTINUE
! 318: 110 CONTINUE
! 319: *
! 320: * Update the relevant part of the trailing submatrix
! 321: *
! 322: IF( J2.GT.0 ) THEN
! 323: *
! 324: * Update A12
! 325: *
! 326: CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
! 327: $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
! 328: $ AB( KV+1-JB, J+JB ), LDAB-1 )
! 329: *
! 330: IF( I2.GT.0 ) THEN
! 331: *
! 332: * Update A22
! 333: *
! 334: CALL ZGEMM( 'No transpose', 'No transpose', I2, J2,
! 335: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
! 336: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
! 337: $ AB( KV+1, J+JB ), LDAB-1 )
! 338: END IF
! 339: *
! 340: IF( I3.GT.0 ) THEN
! 341: *
! 342: * Update A32
! 343: *
! 344: CALL ZGEMM( 'No transpose', 'No transpose', I3, J2,
! 345: $ JB, -ONE, WORK31, LDWORK,
! 346: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
! 347: $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
! 348: END IF
! 349: END IF
! 350: *
! 351: IF( J3.GT.0 ) THEN
! 352: *
! 353: * Copy the lower triangle of A13 into the work array
! 354: * WORK13
! 355: *
! 356: DO 130 JJ = 1, J3
! 357: DO 120 II = JJ, JB
! 358: WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
! 359: 120 CONTINUE
! 360: 130 CONTINUE
! 361: *
! 362: * Update A13 in the work array
! 363: *
! 364: CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
! 365: $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
! 366: $ WORK13, LDWORK )
! 367: *
! 368: IF( I2.GT.0 ) THEN
! 369: *
! 370: * Update A23
! 371: *
! 372: CALL ZGEMM( 'No transpose', 'No transpose', I2, J3,
! 373: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
! 374: $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
! 375: $ LDAB-1 )
! 376: END IF
! 377: *
! 378: IF( I3.GT.0 ) THEN
! 379: *
! 380: * Update A33
! 381: *
! 382: CALL ZGEMM( 'No transpose', 'No transpose', I3, J3,
! 383: $ JB, -ONE, WORK31, LDWORK, WORK13,
! 384: $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
! 385: END IF
! 386: *
! 387: * Copy the lower triangle of A13 back into place
! 388: *
! 389: DO 150 JJ = 1, J3
! 390: DO 140 II = JJ, JB
! 391: AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
! 392: 140 CONTINUE
! 393: 150 CONTINUE
! 394: END IF
! 395: ELSE
! 396: *
! 397: * Adjust the pivot indices.
! 398: *
! 399: DO 160 I = J, J + JB - 1
! 400: IPIV( I ) = IPIV( I ) + J - 1
! 401: 160 CONTINUE
! 402: END IF
! 403: *
! 404: * Partially undo the interchanges in the current block to
! 405: * restore the upper triangular form of A31 and copy the upper
! 406: * triangle of A31 back into place
! 407: *
! 408: DO 170 JJ = J + JB - 1, J, -1
! 409: JP = IPIV( JJ ) - JJ + 1
! 410: IF( JP.NE.1 ) THEN
! 411: *
! 412: * Apply interchange to columns J to JJ-1
! 413: *
! 414: IF( JP+JJ-1.LT.J+KL ) THEN
! 415: *
! 416: * The interchange does not affect A31
! 417: *
! 418: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
! 419: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
! 420: ELSE
! 421: *
! 422: * The interchange does affect A31
! 423: *
! 424: CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
! 425: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
! 426: END IF
! 427: END IF
! 428: *
! 429: * Copy the current column of A31 back into place
! 430: *
! 431: NW = MIN( I3, JJ-J+1 )
! 432: IF( NW.GT.0 )
! 433: $ CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
! 434: $ AB( KV+KL+1-JJ+J, JJ ), 1 )
! 435: 170 CONTINUE
! 436: 180 CONTINUE
! 437: END IF
! 438: *
! 439: RETURN
! 440: *
! 441: * End of ZGBTRF
! 442: *
! 443: END
CVSweb interface <joel.bertrand@systella.fr>