File:  [local] / rpl / lapack / lapack / zgbtrf.f
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Jan 26 15:22:46 2010 UTC (14 years, 3 months ago) by bertrand
Branches: MAIN
CVS tags: HEAD
Initial revision

    1:       SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
    2: *
    3: *  -- LAPACK routine (version 3.2) --
    4: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
    5: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
    6: *     November 2006
    7: *
    8: *     .. Scalar Arguments ..
    9:       INTEGER            INFO, KL, KU, LDAB, M, N
   10: *     ..
   11: *     .. Array Arguments ..
   12:       INTEGER            IPIV( * )
   13:       COMPLEX*16         AB( LDAB, * )
   14: *     ..
   15: *
   16: *  Purpose
   17: *  =======
   18: *
   19: *  ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
   20: *  using partial pivoting with row interchanges.
   21: *
   22: *  This is the blocked version of the algorithm, calling Level 3 BLAS.
   23: *
   24: *  Arguments
   25: *  =========
   26: *
   27: *  M       (input) INTEGER
   28: *          The number of rows of the matrix A.  M >= 0.
   29: *
   30: *  N       (input) INTEGER
   31: *          The number of columns of the matrix A.  N >= 0.
   32: *
   33: *  KL      (input) INTEGER
   34: *          The number of subdiagonals within the band of A.  KL >= 0.
   35: *
   36: *  KU      (input) INTEGER
   37: *          The number of superdiagonals within the band of A.  KU >= 0.
   38: *
   39: *  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
   40: *          On entry, the matrix A in band storage, in rows KL+1 to
   41: *          2*KL+KU+1; rows 1 to KL of the array need not be set.
   42: *          The j-th column of A is stored in the j-th column of the
   43: *          array AB as follows:
   44: *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
   45: *
   46: *          On exit, details of the factorization: U is stored as an
   47: *          upper triangular band matrix with KL+KU superdiagonals in
   48: *          rows 1 to KL+KU+1, and the multipliers used during the
   49: *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
   50: *          See below for further details.
   51: *
   52: *  LDAB    (input) INTEGER
   53: *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
   54: *
   55: *  IPIV    (output) INTEGER array, dimension (min(M,N))
   56: *          The pivot indices; for 1 <= i <= min(M,N), row i of the
   57: *          matrix was interchanged with row IPIV(i).
   58: *
   59: *  INFO    (output) INTEGER
   60: *          = 0: successful exit
   61: *          < 0: if INFO = -i, the i-th argument had an illegal value
   62: *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
   63: *               has been completed, but the factor U is exactly
   64: *               singular, and division by zero will occur if it is used
   65: *               to solve a system of equations.
   66: *
   67: *  Further Details
   68: *  ===============
   69: *
   70: *  The band storage scheme is illustrated by the following example, when
   71: *  M = N = 6, KL = 2, KU = 1:
   72: *
   73: *  On entry:                       On exit:
   74: *
   75: *      *    *    *    +    +    +       *    *    *   u14  u25  u36
   76: *      *    *    +    +    +    +       *    *   u13  u24  u35  u46
   77: *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
   78: *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
   79: *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
   80: *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
   81: *
   82: *  Array elements marked * are not used by the routine; elements marked
   83: *  + need not be set on entry, but are required by the routine to store
   84: *  elements of U because of fill-in resulting from the row interchanges.
   85: *
   86: *  =====================================================================
   87: *
   88: *     .. Parameters ..
   89:       COMPLEX*16         ONE, ZERO
   90:       PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
   91:      $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
   92:       INTEGER            NBMAX, LDWORK
   93:       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
   94: *     ..
   95: *     .. Local Scalars ..
   96:       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
   97:      $                   JU, K2, KM, KV, NB, NW
   98:       COMPLEX*16         TEMP
   99: *     ..
  100: *     .. Local Arrays ..
  101:       COMPLEX*16         WORK13( LDWORK, NBMAX ),
  102:      $                   WORK31( LDWORK, NBMAX )
  103: *     ..
  104: *     .. External Functions ..
  105:       INTEGER            ILAENV, IZAMAX
  106:       EXTERNAL           ILAENV, IZAMAX
  107: *     ..
  108: *     .. External Subroutines ..
  109:       EXTERNAL           XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
  110:      $                   ZSCAL, ZSWAP, ZTRSM
  111: *     ..
  112: *     .. Intrinsic Functions ..
  113:       INTRINSIC          MAX, MIN
  114: *     ..
  115: *     .. Executable Statements ..
  116: *
  117: *     KV is the number of superdiagonals in the factor U, allowing for
  118: *     fill-in
  119: *
  120:       KV = KU + KL
  121: *
  122: *     Test the input parameters.
  123: *
  124:       INFO = 0
  125:       IF( M.LT.0 ) THEN
  126:          INFO = -1
  127:       ELSE IF( N.LT.0 ) THEN
  128:          INFO = -2
  129:       ELSE IF( KL.LT.0 ) THEN
  130:          INFO = -3
  131:       ELSE IF( KU.LT.0 ) THEN
  132:          INFO = -4
  133:       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
  134:          INFO = -6
  135:       END IF
  136:       IF( INFO.NE.0 ) THEN
  137:          CALL XERBLA( 'ZGBTRF', -INFO )
  138:          RETURN
  139:       END IF
  140: *
  141: *     Quick return if possible
  142: *
  143:       IF( M.EQ.0 .OR. N.EQ.0 )
  144:      $   RETURN
  145: *
  146: *     Determine the block size for this environment
  147: *
  148:       NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU )
  149: *
  150: *     The block size must not exceed the limit set by the size of the
  151: *     local arrays WORK13 and WORK31.
  152: *
  153:       NB = MIN( NB, NBMAX )
  154: *
  155:       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
  156: *
  157: *        Use unblocked code
  158: *
  159:          CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  160:       ELSE
  161: *
  162: *        Use blocked code
  163: *
  164: *        Zero the superdiagonal elements of the work array WORK13
  165: *
  166:          DO 20 J = 1, NB
  167:             DO 10 I = 1, J - 1
  168:                WORK13( I, J ) = ZERO
  169:    10       CONTINUE
  170:    20    CONTINUE
  171: *
  172: *        Zero the subdiagonal elements of the work array WORK31
  173: *
  174:          DO 40 J = 1, NB
  175:             DO 30 I = J + 1, NB
  176:                WORK31( I, J ) = ZERO
  177:    30       CONTINUE
  178:    40    CONTINUE
  179: *
  180: *        Gaussian elimination with partial pivoting
  181: *
  182: *        Set fill-in elements in columns KU+2 to KV to zero
  183: *
  184:          DO 60 J = KU + 2, MIN( KV, N )
  185:             DO 50 I = KV - J + 2, KL
  186:                AB( I, J ) = ZERO
  187:    50       CONTINUE
  188:    60    CONTINUE
  189: *
  190: *        JU is the index of the last column affected by the current
  191: *        stage of the factorization
  192: *
  193:          JU = 1
  194: *
  195:          DO 180 J = 1, MIN( M, N ), NB
  196:             JB = MIN( NB, MIN( M, N )-J+1 )
  197: *
  198: *           The active part of the matrix is partitioned
  199: *
  200: *              A11   A12   A13
  201: *              A21   A22   A23
  202: *              A31   A32   A33
  203: *
  204: *           Here A11, A21 and A31 denote the current block of JB columns
  205: *           which is about to be factorized. The number of rows in the
  206: *           partitioning are JB, I2, I3 respectively, and the numbers
  207: *           of columns are JB, J2, J3. The superdiagonal elements of A13
  208: *           and the subdiagonal elements of A31 lie outside the band.
  209: *
  210:             I2 = MIN( KL-JB, M-J-JB+1 )
  211:             I3 = MIN( JB, M-J-KL+1 )
  212: *
  213: *           J2 and J3 are computed after JU has been updated.
  214: *
  215: *           Factorize the current block of JB columns
  216: *
  217:             DO 80 JJ = J, J + JB - 1
  218: *
  219: *              Set fill-in elements in column JJ+KV to zero
  220: *
  221:                IF( JJ+KV.LE.N ) THEN
  222:                   DO 70 I = 1, KL
  223:                      AB( I, JJ+KV ) = ZERO
  224:    70             CONTINUE
  225:                END IF
  226: *
  227: *              Find pivot and test for singularity. KM is the number of
  228: *              subdiagonal elements in the current column.
  229: *
  230:                KM = MIN( KL, M-JJ )
  231:                JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
  232:                IPIV( JJ ) = JP + JJ - J
  233:                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
  234:                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
  235:                   IF( JP.NE.1 ) THEN
  236: *
  237: *                    Apply interchange to columns J to J+JB-1
  238: *
  239:                      IF( JP+JJ-1.LT.J+KL ) THEN
  240: *
  241:                         CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
  242:      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
  243:                      ELSE
  244: *
  245: *                       The interchange affects columns J to JJ-1 of A31
  246: *                       which are stored in the work array WORK31
  247: *
  248:                         CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  249:      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
  250:                         CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
  251:      $                              AB( KV+JP, JJ ), LDAB-1 )
  252:                      END IF
  253:                   END IF
  254: *
  255: *                 Compute multipliers
  256: *
  257:                   CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
  258:      $                        1 )
  259: *
  260: *                 Update trailing submatrix within the band and within
  261: *                 the current block. JM is the index of the last column
  262: *                 which needs to be updated.
  263: *
  264:                   JM = MIN( JU, J+JB-1 )
  265:                   IF( JM.GT.JJ )
  266:      $               CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
  267:      $                           AB( KV, JJ+1 ), LDAB-1,
  268:      $                           AB( KV+1, JJ+1 ), LDAB-1 )
  269:                ELSE
  270: *
  271: *                 If pivot is zero, set INFO to the index of the pivot
  272: *                 unless a zero pivot has already been found.
  273: *
  274:                   IF( INFO.EQ.0 )
  275:      $               INFO = JJ
  276:                END IF
  277: *
  278: *              Copy current column of A31 into the work array WORK31
  279: *
  280:                NW = MIN( JJ-J+1, I3 )
  281:                IF( NW.GT.0 )
  282:      $            CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
  283:      $                        WORK31( 1, JJ-J+1 ), 1 )
  284:    80       CONTINUE
  285:             IF( J+JB.LE.N ) THEN
  286: *
  287: *              Apply the row interchanges to the other blocks.
  288: *
  289:                J2 = MIN( JU-J+1, KV ) - JB
  290:                J3 = MAX( 0, JU-J-KV+1 )
  291: *
  292: *              Use ZLASWP to apply the row interchanges to A12, A22, and
  293: *              A32.
  294: *
  295:                CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
  296:      $                      IPIV( J ), 1 )
  297: *
  298: *              Adjust the pivot indices.
  299: *
  300:                DO 90 I = J, J + JB - 1
  301:                   IPIV( I ) = IPIV( I ) + J - 1
  302:    90          CONTINUE
  303: *
  304: *              Apply the row interchanges to A13, A23, and A33
  305: *              columnwise.
  306: *
  307:                K2 = J - 1 + JB + J2
  308:                DO 110 I = 1, J3
  309:                   JJ = K2 + I
  310:                   DO 100 II = J + I - 1, J + JB - 1
  311:                      IP = IPIV( II )
  312:                      IF( IP.NE.II ) THEN
  313:                         TEMP = AB( KV+1+II-JJ, JJ )
  314:                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
  315:                         AB( KV+1+IP-JJ, JJ ) = TEMP
  316:                      END IF
  317:   100             CONTINUE
  318:   110          CONTINUE
  319: *
  320: *              Update the relevant part of the trailing submatrix
  321: *
  322:                IF( J2.GT.0 ) THEN
  323: *
  324: *                 Update A12
  325: *
  326:                   CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
  327:      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
  328:      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
  329: *
  330:                   IF( I2.GT.0 ) THEN
  331: *
  332: *                    Update A22
  333: *
  334:                      CALL ZGEMM( 'No transpose', 'No transpose', I2, J2,
  335:      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
  336:      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
  337:      $                           AB( KV+1, J+JB ), LDAB-1 )
  338:                   END IF
  339: *
  340:                   IF( I3.GT.0 ) THEN
  341: *
  342: *                    Update A32
  343: *
  344:                      CALL ZGEMM( 'No transpose', 'No transpose', I3, J2,
  345:      $                           JB, -ONE, WORK31, LDWORK,
  346:      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
  347:      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
  348:                   END IF
  349:                END IF
  350: *
  351:                IF( J3.GT.0 ) THEN
  352: *
  353: *                 Copy the lower triangle of A13 into the work array
  354: *                 WORK13
  355: *
  356:                   DO 130 JJ = 1, J3
  357:                      DO 120 II = JJ, JB
  358:                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
  359:   120                CONTINUE
  360:   130             CONTINUE
  361: *
  362: *                 Update A13 in the work array
  363: *
  364:                   CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
  365:      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
  366:      $                        WORK13, LDWORK )
  367: *
  368:                   IF( I2.GT.0 ) THEN
  369: *
  370: *                    Update A23
  371: *
  372:                      CALL ZGEMM( 'No transpose', 'No transpose', I2, J3,
  373:      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
  374:      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
  375:      $                           LDAB-1 )
  376:                   END IF
  377: *
  378:                   IF( I3.GT.0 ) THEN
  379: *
  380: *                    Update A33
  381: *
  382:                      CALL ZGEMM( 'No transpose', 'No transpose', I3, J3,
  383:      $                           JB, -ONE, WORK31, LDWORK, WORK13,
  384:      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
  385:                   END IF
  386: *
  387: *                 Copy the lower triangle of A13 back into place
  388: *
  389:                   DO 150 JJ = 1, J3
  390:                      DO 140 II = JJ, JB
  391:                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
  392:   140                CONTINUE
  393:   150             CONTINUE
  394:                END IF
  395:             ELSE
  396: *
  397: *              Adjust the pivot indices.
  398: *
  399:                DO 160 I = J, J + JB - 1
  400:                   IPIV( I ) = IPIV( I ) + J - 1
  401:   160          CONTINUE
  402:             END IF
  403: *
  404: *           Partially undo the interchanges in the current block to
  405: *           restore the upper triangular form of A31 and copy the upper
  406: *           triangle of A31 back into place
  407: *
  408:             DO 170 JJ = J + JB - 1, J, -1
  409:                JP = IPIV( JJ ) - JJ + 1
  410:                IF( JP.NE.1 ) THEN
  411: *
  412: *                 Apply interchange to columns J to JJ-1
  413: *
  414:                   IF( JP+JJ-1.LT.J+KL ) THEN
  415: *
  416: *                    The interchange does not affect A31
  417: *
  418:                      CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  419:      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
  420:                   ELSE
  421: *
  422: *                    The interchange does affect A31
  423: *
  424:                      CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  425:      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
  426:                   END IF
  427:                END IF
  428: *
  429: *              Copy the current column of A31 back into place
  430: *
  431:                NW = MIN( I3, JJ-J+1 )
  432:                IF( NW.GT.0 )
  433:      $            CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
  434:      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
  435:   170       CONTINUE
  436:   180    CONTINUE
  437:       END IF
  438: *
  439:       RETURN
  440: *
  441: *     End of ZGBTRF
  442: *
  443:       END

CVSweb interface <joel.bertrand@systella.fr>