File:  [local] / rpl / lapack / lapack / dgbtrf.f
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Jan 26 15:22:45 2010 UTC (14 years, 3 months ago) by bertrand
Branches: JKB
CVS tags: start, rpl-4_0_14, rpl-4_0_13, rpl-4_0_12, rpl-4_0_11, rpl-4_0_10


Commit initial.

    1:       SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
    2: *
    3: *  -- LAPACK routine (version 3.2) --
    4: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
    5: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
    6: *     November 2006
    7: *
    8: *     .. Scalar Arguments ..
    9:       INTEGER            INFO, KL, KU, LDAB, M, N
   10: *     ..
   11: *     .. Array Arguments ..
   12:       INTEGER            IPIV( * )
   13:       DOUBLE PRECISION   AB( LDAB, * )
   14: *     ..
   15: *
   16: *  Purpose
   17: *  =======
   18: *
   19: *  DGBTRF computes an LU factorization of a real m-by-n band matrix A
   20: *  using partial pivoting with row interchanges.
   21: *
   22: *  This is the blocked version of the algorithm, calling Level 3 BLAS.
   23: *
   24: *  Arguments
   25: *  =========
   26: *
   27: *  M       (input) INTEGER
   28: *          The number of rows of the matrix A.  M >= 0.
   29: *
   30: *  N       (input) INTEGER
   31: *          The number of columns of the matrix A.  N >= 0.
   32: *
   33: *  KL      (input) INTEGER
   34: *          The number of subdiagonals within the band of A.  KL >= 0.
   35: *
   36: *  KU      (input) INTEGER
   37: *          The number of superdiagonals within the band of A.  KU >= 0.
   38: *
   39: *  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
   40: *          On entry, the matrix A in band storage, in rows KL+1 to
   41: *          2*KL+KU+1; rows 1 to KL of the array need not be set.
   42: *          The j-th column of A is stored in the j-th column of the
   43: *          array AB as follows:
   44: *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
   45: *
   46: *          On exit, details of the factorization: U is stored as an
   47: *          upper triangular band matrix with KL+KU superdiagonals in
   48: *          rows 1 to KL+KU+1, and the multipliers used during the
   49: *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
   50: *          See below for further details.
   51: *
   52: *  LDAB    (input) INTEGER
   53: *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
   54: *
   55: *  IPIV    (output) INTEGER array, dimension (min(M,N))
   56: *          The pivot indices; for 1 <= i <= min(M,N), row i of the
   57: *          matrix was interchanged with row IPIV(i).
   58: *
   59: *  INFO    (output) INTEGER
   60: *          = 0: successful exit
   61: *          < 0: if INFO = -i, the i-th argument had an illegal value
   62: *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
   63: *               has been completed, but the factor U is exactly
   64: *               singular, and division by zero will occur if it is used
   65: *               to solve a system of equations.
   66: *
   67: *  Further Details
   68: *  ===============
   69: *
   70: *  The band storage scheme is illustrated by the following example, when
   71: *  M = N = 6, KL = 2, KU = 1:
   72: *
   73: *  On entry:                       On exit:
   74: *
   75: *      *    *    *    +    +    +       *    *    *   u14  u25  u36
   76: *      *    *    +    +    +    +       *    *   u13  u24  u35  u46
   77: *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
   78: *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
   79: *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
   80: *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
   81: *
   82: *  Array elements marked * are not used by the routine; elements marked
   83: *  + need not be set on entry, but are required by the routine to store
   84: *  elements of U because of fill-in resulting from the row interchanges.
   85: *
   86: *  =====================================================================
   87: *
   88: *     .. Parameters ..
   89:       DOUBLE PRECISION   ONE, ZERO
   90:       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
   91:       INTEGER            NBMAX, LDWORK
   92:       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
   93: *     ..
   94: *     .. Local Scalars ..
   95:       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
   96:      $                   JU, K2, KM, KV, NB, NW
   97:       DOUBLE PRECISION   TEMP
   98: *     ..
   99: *     .. Local Arrays ..
  100:       DOUBLE PRECISION   WORK13( LDWORK, NBMAX ),
  101:      $                   WORK31( LDWORK, NBMAX )
  102: *     ..
  103: *     .. External Functions ..
  104:       INTEGER            IDAMAX, ILAENV
  105:       EXTERNAL           IDAMAX, ILAENV
  106: *     ..
  107: *     .. External Subroutines ..
  108:       EXTERNAL           DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
  109:      $                   DSWAP, DTRSM, XERBLA
  110: *     ..
  111: *     .. Intrinsic Functions ..
  112:       INTRINSIC          MAX, MIN
  113: *     ..
  114: *     .. Executable Statements ..
  115: *
  116: *     KV is the number of superdiagonals in the factor U, allowing for
  117: *     fill-in
  118: *
  119:       KV = KU + KL
  120: *
  121: *     Test the input parameters.
  122: *
  123:       INFO = 0
  124:       IF( M.LT.0 ) THEN
  125:          INFO = -1
  126:       ELSE IF( N.LT.0 ) THEN
  127:          INFO = -2
  128:       ELSE IF( KL.LT.0 ) THEN
  129:          INFO = -3
  130:       ELSE IF( KU.LT.0 ) THEN
  131:          INFO = -4
  132:       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
  133:          INFO = -6
  134:       END IF
  135:       IF( INFO.NE.0 ) THEN
  136:          CALL XERBLA( 'DGBTRF', -INFO )
  137:          RETURN
  138:       END IF
  139: *
  140: *     Quick return if possible
  141: *
  142:       IF( M.EQ.0 .OR. N.EQ.0 )
  143:      $   RETURN
  144: *
  145: *     Determine the block size for this environment
  146: *
  147:       NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
  148: *
  149: *     The block size must not exceed the limit set by the size of the
  150: *     local arrays WORK13 and WORK31.
  151: *
  152:       NB = MIN( NB, NBMAX )
  153: *
  154:       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
  155: *
  156: *        Use unblocked code
  157: *
  158:          CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  159:       ELSE
  160: *
  161: *        Use blocked code
  162: *
  163: *        Zero the superdiagonal elements of the work array WORK13
  164: *
  165:          DO 20 J = 1, NB
  166:             DO 10 I = 1, J - 1
  167:                WORK13( I, J ) = ZERO
  168:    10       CONTINUE
  169:    20    CONTINUE
  170: *
  171: *        Zero the subdiagonal elements of the work array WORK31
  172: *
  173:          DO 40 J = 1, NB
  174:             DO 30 I = J + 1, NB
  175:                WORK31( I, J ) = ZERO
  176:    30       CONTINUE
  177:    40    CONTINUE
  178: *
  179: *        Gaussian elimination with partial pivoting
  180: *
  181: *        Set fill-in elements in columns KU+2 to KV to zero
  182: *
  183:          DO 60 J = KU + 2, MIN( KV, N )
  184:             DO 50 I = KV - J + 2, KL
  185:                AB( I, J ) = ZERO
  186:    50       CONTINUE
  187:    60    CONTINUE
  188: *
  189: *        JU is the index of the last column affected by the current
  190: *        stage of the factorization
  191: *
  192:          JU = 1
  193: *
  194:          DO 180 J = 1, MIN( M, N ), NB
  195:             JB = MIN( NB, MIN( M, N )-J+1 )
  196: *
  197: *           The active part of the matrix is partitioned
  198: *
  199: *              A11   A12   A13
  200: *              A21   A22   A23
  201: *              A31   A32   A33
  202: *
  203: *           Here A11, A21 and A31 denote the current block of JB columns
  204: *           which is about to be factorized. The number of rows in the
  205: *           partitioning are JB, I2, I3 respectively, and the numbers
  206: *           of columns are JB, J2, J3. The superdiagonal elements of A13
  207: *           and the subdiagonal elements of A31 lie outside the band.
  208: *
  209:             I2 = MIN( KL-JB, M-J-JB+1 )
  210:             I3 = MIN( JB, M-J-KL+1 )
  211: *
  212: *           J2 and J3 are computed after JU has been updated.
  213: *
  214: *           Factorize the current block of JB columns
  215: *
  216:             DO 80 JJ = J, J + JB - 1
  217: *
  218: *              Set fill-in elements in column JJ+KV to zero
  219: *
  220:                IF( JJ+KV.LE.N ) THEN
  221:                   DO 70 I = 1, KL
  222:                      AB( I, JJ+KV ) = ZERO
  223:    70             CONTINUE
  224:                END IF
  225: *
  226: *              Find pivot and test for singularity. KM is the number of
  227: *              subdiagonal elements in the current column.
  228: *
  229:                KM = MIN( KL, M-JJ )
  230:                JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
  231:                IPIV( JJ ) = JP + JJ - J
  232:                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
  233:                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
  234:                   IF( JP.NE.1 ) THEN
  235: *
  236: *                    Apply interchange to columns J to J+JB-1
  237: *
  238:                      IF( JP+JJ-1.LT.J+KL ) THEN
  239: *
  240:                         CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
  241:      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
  242:                      ELSE
  243: *
  244: *                       The interchange affects columns J to JJ-1 of A31
  245: *                       which are stored in the work array WORK31
  246: *
  247:                         CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  248:      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
  249:                         CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
  250:      $                              AB( KV+JP, JJ ), LDAB-1 )
  251:                      END IF
  252:                   END IF
  253: *
  254: *                 Compute multipliers
  255: *
  256:                   CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
  257:      $                        1 )
  258: *
  259: *                 Update trailing submatrix within the band and within
  260: *                 the current block. JM is the index of the last column
  261: *                 which needs to be updated.
  262: *
  263:                   JM = MIN( JU, J+JB-1 )
  264:                   IF( JM.GT.JJ )
  265:      $               CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
  266:      $                          AB( KV, JJ+1 ), LDAB-1,
  267:      $                          AB( KV+1, JJ+1 ), LDAB-1 )
  268:                ELSE
  269: *
  270: *                 If pivot is zero, set INFO to the index of the pivot
  271: *                 unless a zero pivot has already been found.
  272: *
  273:                   IF( INFO.EQ.0 )
  274:      $               INFO = JJ
  275:                END IF
  276: *
  277: *              Copy current column of A31 into the work array WORK31
  278: *
  279:                NW = MIN( JJ-J+1, I3 )
  280:                IF( NW.GT.0 )
  281:      $            CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
  282:      $                        WORK31( 1, JJ-J+1 ), 1 )
  283:    80       CONTINUE
  284:             IF( J+JB.LE.N ) THEN
  285: *
  286: *              Apply the row interchanges to the other blocks.
  287: *
  288:                J2 = MIN( JU-J+1, KV ) - JB
  289:                J3 = MAX( 0, JU-J-KV+1 )
  290: *
  291: *              Use DLASWP to apply the row interchanges to A12, A22, and
  292: *              A32.
  293: *
  294:                CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
  295:      $                      IPIV( J ), 1 )
  296: *
  297: *              Adjust the pivot indices.
  298: *
  299:                DO 90 I = J, J + JB - 1
  300:                   IPIV( I ) = IPIV( I ) + J - 1
  301:    90          CONTINUE
  302: *
  303: *              Apply the row interchanges to A13, A23, and A33
  304: *              columnwise.
  305: *
  306:                K2 = J - 1 + JB + J2
  307:                DO 110 I = 1, J3
  308:                   JJ = K2 + I
  309:                   DO 100 II = J + I - 1, J + JB - 1
  310:                      IP = IPIV( II )
  311:                      IF( IP.NE.II ) THEN
  312:                         TEMP = AB( KV+1+II-JJ, JJ )
  313:                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
  314:                         AB( KV+1+IP-JJ, JJ ) = TEMP
  315:                      END IF
  316:   100             CONTINUE
  317:   110          CONTINUE
  318: *
  319: *              Update the relevant part of the trailing submatrix
  320: *
  321:                IF( J2.GT.0 ) THEN
  322: *
  323: *                 Update A12
  324: *
  325:                   CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
  326:      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
  327:      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
  328: *
  329:                   IF( I2.GT.0 ) THEN
  330: *
  331: *                    Update A22
  332: *
  333:                      CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
  334:      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
  335:      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
  336:      $                           AB( KV+1, J+JB ), LDAB-1 )
  337:                   END IF
  338: *
  339:                   IF( I3.GT.0 ) THEN
  340: *
  341: *                    Update A32
  342: *
  343:                      CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
  344:      $                           JB, -ONE, WORK31, LDWORK,
  345:      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
  346:      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
  347:                   END IF
  348:                END IF
  349: *
  350:                IF( J3.GT.0 ) THEN
  351: *
  352: *                 Copy the lower triangle of A13 into the work array
  353: *                 WORK13
  354: *
  355:                   DO 130 JJ = 1, J3
  356:                      DO 120 II = JJ, JB
  357:                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
  358:   120                CONTINUE
  359:   130             CONTINUE
  360: *
  361: *                 Update A13 in the work array
  362: *
  363:                   CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
  364:      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
  365:      $                        WORK13, LDWORK )
  366: *
  367:                   IF( I2.GT.0 ) THEN
  368: *
  369: *                    Update A23
  370: *
  371:                      CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
  372:      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
  373:      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
  374:      $                           LDAB-1 )
  375:                   END IF
  376: *
  377:                   IF( I3.GT.0 ) THEN
  378: *
  379: *                    Update A33
  380: *
  381:                      CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
  382:      $                           JB, -ONE, WORK31, LDWORK, WORK13,
  383:      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
  384:                   END IF
  385: *
  386: *                 Copy the lower triangle of A13 back into place
  387: *
  388:                   DO 150 JJ = 1, J3
  389:                      DO 140 II = JJ, JB
  390:                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
  391:   140                CONTINUE
  392:   150             CONTINUE
  393:                END IF
  394:             ELSE
  395: *
  396: *              Adjust the pivot indices.
  397: *
  398:                DO 160 I = J, J + JB - 1
  399:                   IPIV( I ) = IPIV( I ) + J - 1
  400:   160          CONTINUE
  401:             END IF
  402: *
  403: *           Partially undo the interchanges in the current block to
  404: *           restore the upper triangular form of A31 and copy the upper
  405: *           triangle of A31 back into place
  406: *
  407:             DO 170 JJ = J + JB - 1, J, -1
  408:                JP = IPIV( JJ ) - JJ + 1
  409:                IF( JP.NE.1 ) THEN
  410: *
  411: *                 Apply interchange to columns J to JJ-1
  412: *
  413:                   IF( JP+JJ-1.LT.J+KL ) THEN
  414: *
  415: *                    The interchange does not affect A31
  416: *
  417:                      CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  418:      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
  419:                   ELSE
  420: *
  421: *                    The interchange does affect A31
  422: *
  423:                      CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  424:      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
  425:                   END IF
  426:                END IF
  427: *
  428: *              Copy the current column of A31 back into place
  429: *
  430:                NW = MIN( I3, JJ-J+1 )
  431:                IF( NW.GT.0 )
  432:      $            CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
  433:      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
  434:   170       CONTINUE
  435:   180    CONTINUE
  436:       END IF
  437: *
  438:       RETURN
  439: *
  440: *     End of DGBTRF
  441: *
  442:       END

CVSweb interface <joel.bertrand@systella.fr>