Annotation of rpl/lapack/lapack/dgbtrf.f, revision 1.1
1.1 ! bertrand 1: SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
! 2: *
! 3: * -- LAPACK routine (version 3.2) --
! 4: * -- LAPACK is a software package provided by Univ. of Tennessee, --
! 5: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
! 6: * November 2006
! 7: *
! 8: * .. Scalar Arguments ..
! 9: INTEGER INFO, KL, KU, LDAB, M, N
! 10: * ..
! 11: * .. Array Arguments ..
! 12: INTEGER IPIV( * )
! 13: DOUBLE PRECISION AB( LDAB, * )
! 14: * ..
! 15: *
! 16: * Purpose
! 17: * =======
! 18: *
! 19: * DGBTRF computes an LU factorization of a real m-by-n band matrix A
! 20: * using partial pivoting with row interchanges.
! 21: *
! 22: * This is the blocked version of the algorithm, calling Level 3 BLAS.
! 23: *
! 24: * Arguments
! 25: * =========
! 26: *
! 27: * M (input) INTEGER
! 28: * The number of rows of the matrix A. M >= 0.
! 29: *
! 30: * N (input) INTEGER
! 31: * The number of columns of the matrix A. N >= 0.
! 32: *
! 33: * KL (input) INTEGER
! 34: * The number of subdiagonals within the band of A. KL >= 0.
! 35: *
! 36: * KU (input) INTEGER
! 37: * The number of superdiagonals within the band of A. KU >= 0.
! 38: *
! 39: * AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
! 40: * On entry, the matrix A in band storage, in rows KL+1 to
! 41: * 2*KL+KU+1; rows 1 to KL of the array need not be set.
! 42: * The j-th column of A is stored in the j-th column of the
! 43: * array AB as follows:
! 44: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
! 45: *
! 46: * On exit, details of the factorization: U is stored as an
! 47: * upper triangular band matrix with KL+KU superdiagonals in
! 48: * rows 1 to KL+KU+1, and the multipliers used during the
! 49: * factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
! 50: * See below for further details.
! 51: *
! 52: * LDAB (input) INTEGER
! 53: * The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
! 54: *
! 55: * IPIV (output) INTEGER array, dimension (min(M,N))
! 56: * The pivot indices; for 1 <= i <= min(M,N), row i of the
! 57: * matrix was interchanged with row IPIV(i).
! 58: *
! 59: * INFO (output) INTEGER
! 60: * = 0: successful exit
! 61: * < 0: if INFO = -i, the i-th argument had an illegal value
! 62: * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
! 63: * has been completed, but the factor U is exactly
! 64: * singular, and division by zero will occur if it is used
! 65: * to solve a system of equations.
! 66: *
! 67: * Further Details
! 68: * ===============
! 69: *
! 70: * The band storage scheme is illustrated by the following example, when
! 71: * M = N = 6, KL = 2, KU = 1:
! 72: *
! 73: * On entry: On exit:
! 74: *
! 75: * * * * + + + * * * u14 u25 u36
! 76: * * * + + + + * * u13 u24 u35 u46
! 77: * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
! 78: * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
! 79: * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
! 80: * a31 a42 a53 a64 * * m31 m42 m53 m64 * *
! 81: *
! 82: * Array elements marked * are not used by the routine; elements marked
! 83: * + need not be set on entry, but are required by the routine to store
! 84: * elements of U because of fill-in resulting from the row interchanges.
! 85: *
! 86: * =====================================================================
! 87: *
! 88: * .. Parameters ..
! 89: DOUBLE PRECISION ONE, ZERO
! 90: PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
! 91: INTEGER NBMAX, LDWORK
! 92: PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
! 93: * ..
! 94: * .. Local Scalars ..
! 95: INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
! 96: $ JU, K2, KM, KV, NB, NW
! 97: DOUBLE PRECISION TEMP
! 98: * ..
! 99: * .. Local Arrays ..
! 100: DOUBLE PRECISION WORK13( LDWORK, NBMAX ),
! 101: $ WORK31( LDWORK, NBMAX )
! 102: * ..
! 103: * .. External Functions ..
! 104: INTEGER IDAMAX, ILAENV
! 105: EXTERNAL IDAMAX, ILAENV
! 106: * ..
! 107: * .. External Subroutines ..
! 108: EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
! 109: $ DSWAP, DTRSM, XERBLA
! 110: * ..
! 111: * .. Intrinsic Functions ..
! 112: INTRINSIC MAX, MIN
! 113: * ..
! 114: * .. Executable Statements ..
! 115: *
! 116: * KV is the number of superdiagonals in the factor U, allowing for
! 117: * fill-in
! 118: *
! 119: KV = KU + KL
! 120: *
! 121: * Test the input parameters.
! 122: *
! 123: INFO = 0
! 124: IF( M.LT.0 ) THEN
! 125: INFO = -1
! 126: ELSE IF( N.LT.0 ) THEN
! 127: INFO = -2
! 128: ELSE IF( KL.LT.0 ) THEN
! 129: INFO = -3
! 130: ELSE IF( KU.LT.0 ) THEN
! 131: INFO = -4
! 132: ELSE IF( LDAB.LT.KL+KV+1 ) THEN
! 133: INFO = -6
! 134: END IF
! 135: IF( INFO.NE.0 ) THEN
! 136: CALL XERBLA( 'DGBTRF', -INFO )
! 137: RETURN
! 138: END IF
! 139: *
! 140: * Quick return if possible
! 141: *
! 142: IF( M.EQ.0 .OR. N.EQ.0 )
! 143: $ RETURN
! 144: *
! 145: * Determine the block size for this environment
! 146: *
! 147: NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
! 148: *
! 149: * The block size must not exceed the limit set by the size of the
! 150: * local arrays WORK13 and WORK31.
! 151: *
! 152: NB = MIN( NB, NBMAX )
! 153: *
! 154: IF( NB.LE.1 .OR. NB.GT.KL ) THEN
! 155: *
! 156: * Use unblocked code
! 157: *
! 158: CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
! 159: ELSE
! 160: *
! 161: * Use blocked code
! 162: *
! 163: * Zero the superdiagonal elements of the work array WORK13
! 164: *
! 165: DO 20 J = 1, NB
! 166: DO 10 I = 1, J - 1
! 167: WORK13( I, J ) = ZERO
! 168: 10 CONTINUE
! 169: 20 CONTINUE
! 170: *
! 171: * Zero the subdiagonal elements of the work array WORK31
! 172: *
! 173: DO 40 J = 1, NB
! 174: DO 30 I = J + 1, NB
! 175: WORK31( I, J ) = ZERO
! 176: 30 CONTINUE
! 177: 40 CONTINUE
! 178: *
! 179: * Gaussian elimination with partial pivoting
! 180: *
! 181: * Set fill-in elements in columns KU+2 to KV to zero
! 182: *
! 183: DO 60 J = KU + 2, MIN( KV, N )
! 184: DO 50 I = KV - J + 2, KL
! 185: AB( I, J ) = ZERO
! 186: 50 CONTINUE
! 187: 60 CONTINUE
! 188: *
! 189: * JU is the index of the last column affected by the current
! 190: * stage of the factorization
! 191: *
! 192: JU = 1
! 193: *
! 194: DO 180 J = 1, MIN( M, N ), NB
! 195: JB = MIN( NB, MIN( M, N )-J+1 )
! 196: *
! 197: * The active part of the matrix is partitioned
! 198: *
! 199: * A11 A12 A13
! 200: * A21 A22 A23
! 201: * A31 A32 A33
! 202: *
! 203: * Here A11, A21 and A31 denote the current block of JB columns
! 204: * which is about to be factorized. The number of rows in the
! 205: * partitioning are JB, I2, I3 respectively, and the numbers
! 206: * of columns are JB, J2, J3. The superdiagonal elements of A13
! 207: * and the subdiagonal elements of A31 lie outside the band.
! 208: *
! 209: I2 = MIN( KL-JB, M-J-JB+1 )
! 210: I3 = MIN( JB, M-J-KL+1 )
! 211: *
! 212: * J2 and J3 are computed after JU has been updated.
! 213: *
! 214: * Factorize the current block of JB columns
! 215: *
! 216: DO 80 JJ = J, J + JB - 1
! 217: *
! 218: * Set fill-in elements in column JJ+KV to zero
! 219: *
! 220: IF( JJ+KV.LE.N ) THEN
! 221: DO 70 I = 1, KL
! 222: AB( I, JJ+KV ) = ZERO
! 223: 70 CONTINUE
! 224: END IF
! 225: *
! 226: * Find pivot and test for singularity. KM is the number of
! 227: * subdiagonal elements in the current column.
! 228: *
! 229: KM = MIN( KL, M-JJ )
! 230: JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
! 231: IPIV( JJ ) = JP + JJ - J
! 232: IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
! 233: JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
! 234: IF( JP.NE.1 ) THEN
! 235: *
! 236: * Apply interchange to columns J to J+JB-1
! 237: *
! 238: IF( JP+JJ-1.LT.J+KL ) THEN
! 239: *
! 240: CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
! 241: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
! 242: ELSE
! 243: *
! 244: * The interchange affects columns J to JJ-1 of A31
! 245: * which are stored in the work array WORK31
! 246: *
! 247: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
! 248: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
! 249: CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
! 250: $ AB( KV+JP, JJ ), LDAB-1 )
! 251: END IF
! 252: END IF
! 253: *
! 254: * Compute multipliers
! 255: *
! 256: CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
! 257: $ 1 )
! 258: *
! 259: * Update trailing submatrix within the band and within
! 260: * the current block. JM is the index of the last column
! 261: * which needs to be updated.
! 262: *
! 263: JM = MIN( JU, J+JB-1 )
! 264: IF( JM.GT.JJ )
! 265: $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
! 266: $ AB( KV, JJ+1 ), LDAB-1,
! 267: $ AB( KV+1, JJ+1 ), LDAB-1 )
! 268: ELSE
! 269: *
! 270: * If pivot is zero, set INFO to the index of the pivot
! 271: * unless a zero pivot has already been found.
! 272: *
! 273: IF( INFO.EQ.0 )
! 274: $ INFO = JJ
! 275: END IF
! 276: *
! 277: * Copy current column of A31 into the work array WORK31
! 278: *
! 279: NW = MIN( JJ-J+1, I3 )
! 280: IF( NW.GT.0 )
! 281: $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
! 282: $ WORK31( 1, JJ-J+1 ), 1 )
! 283: 80 CONTINUE
! 284: IF( J+JB.LE.N ) THEN
! 285: *
! 286: * Apply the row interchanges to the other blocks.
! 287: *
! 288: J2 = MIN( JU-J+1, KV ) - JB
! 289: J3 = MAX( 0, JU-J-KV+1 )
! 290: *
! 291: * Use DLASWP to apply the row interchanges to A12, A22, and
! 292: * A32.
! 293: *
! 294: CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
! 295: $ IPIV( J ), 1 )
! 296: *
! 297: * Adjust the pivot indices.
! 298: *
! 299: DO 90 I = J, J + JB - 1
! 300: IPIV( I ) = IPIV( I ) + J - 1
! 301: 90 CONTINUE
! 302: *
! 303: * Apply the row interchanges to A13, A23, and A33
! 304: * columnwise.
! 305: *
! 306: K2 = J - 1 + JB + J2
! 307: DO 110 I = 1, J3
! 308: JJ = K2 + I
! 309: DO 100 II = J + I - 1, J + JB - 1
! 310: IP = IPIV( II )
! 311: IF( IP.NE.II ) THEN
! 312: TEMP = AB( KV+1+II-JJ, JJ )
! 313: AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
! 314: AB( KV+1+IP-JJ, JJ ) = TEMP
! 315: END IF
! 316: 100 CONTINUE
! 317: 110 CONTINUE
! 318: *
! 319: * Update the relevant part of the trailing submatrix
! 320: *
! 321: IF( J2.GT.0 ) THEN
! 322: *
! 323: * Update A12
! 324: *
! 325: CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
! 326: $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
! 327: $ AB( KV+1-JB, J+JB ), LDAB-1 )
! 328: *
! 329: IF( I2.GT.0 ) THEN
! 330: *
! 331: * Update A22
! 332: *
! 333: CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
! 334: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
! 335: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
! 336: $ AB( KV+1, J+JB ), LDAB-1 )
! 337: END IF
! 338: *
! 339: IF( I3.GT.0 ) THEN
! 340: *
! 341: * Update A32
! 342: *
! 343: CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
! 344: $ JB, -ONE, WORK31, LDWORK,
! 345: $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
! 346: $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
! 347: END IF
! 348: END IF
! 349: *
! 350: IF( J3.GT.0 ) THEN
! 351: *
! 352: * Copy the lower triangle of A13 into the work array
! 353: * WORK13
! 354: *
! 355: DO 130 JJ = 1, J3
! 356: DO 120 II = JJ, JB
! 357: WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
! 358: 120 CONTINUE
! 359: 130 CONTINUE
! 360: *
! 361: * Update A13 in the work array
! 362: *
! 363: CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
! 364: $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
! 365: $ WORK13, LDWORK )
! 366: *
! 367: IF( I2.GT.0 ) THEN
! 368: *
! 369: * Update A23
! 370: *
! 371: CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
! 372: $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
! 373: $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
! 374: $ LDAB-1 )
! 375: END IF
! 376: *
! 377: IF( I3.GT.0 ) THEN
! 378: *
! 379: * Update A33
! 380: *
! 381: CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
! 382: $ JB, -ONE, WORK31, LDWORK, WORK13,
! 383: $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
! 384: END IF
! 385: *
! 386: * Copy the lower triangle of A13 back into place
! 387: *
! 388: DO 150 JJ = 1, J3
! 389: DO 140 II = JJ, JB
! 390: AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
! 391: 140 CONTINUE
! 392: 150 CONTINUE
! 393: END IF
! 394: ELSE
! 395: *
! 396: * Adjust the pivot indices.
! 397: *
! 398: DO 160 I = J, J + JB - 1
! 399: IPIV( I ) = IPIV( I ) + J - 1
! 400: 160 CONTINUE
! 401: END IF
! 402: *
! 403: * Partially undo the interchanges in the current block to
! 404: * restore the upper triangular form of A31 and copy the upper
! 405: * triangle of A31 back into place
! 406: *
! 407: DO 170 JJ = J + JB - 1, J, -1
! 408: JP = IPIV( JJ ) - JJ + 1
! 409: IF( JP.NE.1 ) THEN
! 410: *
! 411: * Apply interchange to columns J to JJ-1
! 412: *
! 413: IF( JP+JJ-1.LT.J+KL ) THEN
! 414: *
! 415: * The interchange does not affect A31
! 416: *
! 417: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
! 418: $ AB( KV+JP+JJ-J, J ), LDAB-1 )
! 419: ELSE
! 420: *
! 421: * The interchange does affect A31
! 422: *
! 423: CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
! 424: $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
! 425: END IF
! 426: END IF
! 427: *
! 428: * Copy the current column of A31 back into place
! 429: *
! 430: NW = MIN( I3, JJ-J+1 )
! 431: IF( NW.GT.0 )
! 432: $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
! 433: $ AB( KV+KL+1-JJ+J, JJ ), 1 )
! 434: 170 CONTINUE
! 435: 180 CONTINUE
! 436: END IF
! 437: *
! 438: RETURN
! 439: *
! 440: * End of DGBTRF
! 441: *
! 442: END
CVSweb interface <joel.bertrand@systella.fr>