rpl/lapack/lapack/dsgesv.f - view

Return to dsgesv.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / dsgesv.f
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Jan 26 15:22:45 2010 UTC (14 years, 3 months ago) by bertrand
Branches: MAIN
CVS tags: HEAD

Initial revision

    1:       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
    2:      +                   SWORK, ITER, INFO )
    3: *
    4: *  -- LAPACK PROTOTYPE driver routine (version 3.2) --
    5: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
    6: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
    7: *     February 2007
    8: *
    9: *     ..
   10: *     .. Scalar Arguments ..
   11:       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
   12: *     ..
   13: *     .. Array Arguments ..
   14:       INTEGER            IPIV( * )
   15:       REAL               SWORK( * )
   16:       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
   17:      +                   X( LDX, * )
   18: *     ..
   19: *
   20: *  Purpose
   21: *  =======
   22: *
   23: *  DSGESV computes the solution to a real system of linear equations
   24: *     A * X = B,
   25: *  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
   26: *
   27: *  DSGESV first attempts to factorize the matrix in SINGLE PRECISION
   28: *  and use this factorization within an iterative refinement procedure
   29: *  to produce a solution with DOUBLE PRECISION normwise backward error
   30: *  quality (see below). If the approach fails the method switches to a
   31: *  DOUBLE PRECISION factorization and solve.
   32: *
   33: *  The iterative refinement is not going to be a winning strategy if
   34: *  the ratio SINGLE PRECISION performance over DOUBLE PRECISION
   35: *  performance is too small. A reasonable strategy should take the
   36: *  number of right-hand sides and the size of the matrix into account.
   37: *  This might be done with a call to ILAENV in the future. Up to now, we
   38: *  always try iterative refinement.
   39: *
   40: *  The iterative refinement process is stopped if
   41: *      ITER > ITERMAX
   42: *  or for all the RHS we have:
   43: *      RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
   44: *  where
   45: *      o ITER is the number of the current iteration in the iterative
   46: *        refinement process
   47: *      o RNRM is the infinity-norm of the residual
   48: *      o XNRM is the infinity-norm of the solution
   49: *      o ANRM is the infinity-operator-norm of the matrix A
   50: *      o EPS is the machine epsilon returned by DLAMCH('Epsilon')
   51: *  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
   52: *  respectively.
   53: *
   54: *  Arguments
   55: *  =========
   56: *
   57: *  N       (input) INTEGER
   58: *          The number of linear equations, i.e., the order of the
   59: *          matrix A.  N >= 0.
   60: *
   61: *  NRHS    (input) INTEGER
   62: *          The number of right hand sides, i.e., the number of columns
   63: *          of the matrix B.  NRHS >= 0.
   64: *
   65: *  A       (input or input/ouptut) DOUBLE PRECISION array,
   66: *          dimension (LDA,N)
   67: *          On entry, the N-by-N coefficient matrix A.
   68: *          On exit, if iterative refinement has been successfully used
   69: *          (INFO.EQ.0 and ITER.GE.0, see description below), then A is
   70: *          unchanged, if double precision factorization has been used
   71: *          (INFO.EQ.0 and ITER.LT.0, see description below), then the
   72: *          array A contains the factors L and U from the factorization
   73: *          A = P*L*U; the unit diagonal elements of L are not stored.
   74: *
   75: *  LDA     (input) INTEGER
   76: *          The leading dimension of the array A.  LDA >= max(1,N).
   77: *
   78: *  IPIV    (output) INTEGER array, dimension (N)
   79: *          The pivot indices that define the permutation matrix P;
   80: *          row i of the matrix was interchanged with row IPIV(i).
   81: *          Corresponds either to the single precision factorization
   82: *          (if INFO.EQ.0 and ITER.GE.0) or the double precision
   83: *          factorization (if INFO.EQ.0 and ITER.LT.0).
   84: *
   85: *  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
   86: *          The N-by-NRHS right hand side matrix B.
   87: *
   88: *  LDB     (input) INTEGER
   89: *          The leading dimension of the array B.  LDB >= max(1,N).
   90: *
   91: *  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
   92: *          If INFO = 0, the N-by-NRHS solution matrix X.
   93: *
   94: *  LDX     (input) INTEGER
   95: *          The leading dimension of the array X.  LDX >= max(1,N).
   96: *
   97: *  WORK    (workspace) DOUBLE PRECISION array, dimension (N*NRHS)
   98: *          This array is used to hold the residual vectors.
   99: *
  100: *  SWORK   (workspace) REAL array, dimension (N*(N+NRHS))
  101: *          This array is used to use the single precision matrix and the
  102: *          right-hand sides or solutions in single precision.
  103: *
  104: *  ITER    (output) INTEGER
  105: *          < 0: iterative refinement has failed, double precision
  106: *               factorization has been performed
  107: *               -1 : the routine fell back to full precision for
  108: *                    implementation- or machine-specific reasons
  109: *               -2 : narrowing the precision induced an overflow,
  110: *                    the routine fell back to full precision
  111: *               -3 : failure of SGETRF
  112: *               -31: stop the iterative refinement after the 30th
  113: *                    iterations
  114: *          > 0: iterative refinement has been sucessfully used.
  115: *               Returns the number of iterations
  116: *
  117: *  INFO    (output) INTEGER
  118: *          = 0:  successful exit
  119: *          < 0:  if INFO = -i, the i-th argument had an illegal value
  120: *          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
  121: *                exactly zero.  The factorization has been completed,
  122: *                but the factor U is exactly singular, so the solution
  123: *                could not be computed.
  124: *
  125: *  =========
  126: *
  127: *     .. Parameters ..
  128:       LOGICAL            DOITREF
  129:       PARAMETER          ( DOITREF = .TRUE. )
  130: *
  131:       INTEGER            ITERMAX
  132:       PARAMETER          ( ITERMAX = 30 )
  133: *
  134:       DOUBLE PRECISION   BWDMAX
  135:       PARAMETER          ( BWDMAX = 1.0E+00 )
  136: *
  137:       DOUBLE PRECISION   NEGONE, ONE
  138:       PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )
  139: *
  140: *     .. Local Scalars ..
  141:       INTEGER            I, IITER, PTSA, PTSX
  142:       DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM
  143: *
  144: *     .. External Subroutines ..
  145:       EXTERNAL           DAXPY, DGEMM, DLACPY, DLAG2S, SLAG2D, SGETRF,
  146:      +                   SGETRS, XERBLA
  147: *     ..
  148: *     .. External Functions ..
  149:       INTEGER            IDAMAX
  150:       DOUBLE PRECISION   DLAMCH, DLANGE
  151:       EXTERNAL           IDAMAX, DLAMCH, DLANGE
  152: *     ..
  153: *     .. Intrinsic Functions ..
  154:       INTRINSIC          ABS, DBLE, MAX, SQRT
  155: *     ..
  156: *     .. Executable Statements ..
  157: *
  158:       INFO = 0
  159:       ITER = 0
  160: *
  161: *     Test the input parameters.
  162: *
  163:       IF( N.LT.0 ) THEN
  164:          INFO = -1
  165:       ELSE IF( NRHS.LT.0 ) THEN
  166:          INFO = -2
  167:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  168:          INFO = -4
  169:       ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
  170:          INFO = -7
  171:       ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
  172:          INFO = -9
  173:       END IF
  174:       IF( INFO.NE.0 ) THEN
  175:          CALL XERBLA( 'DSGESV', -INFO )
  176:          RETURN
  177:       END IF
  178: *
  179: *     Quick return if (N.EQ.0).
  180: *
  181:       IF( N.EQ.0 )
  182:      +   RETURN
  183: *
  184: *     Skip single precision iterative refinement if a priori slower
  185: *     than double precision factorization.
  186: *
  187:       IF( .NOT.DOITREF ) THEN
  188:          ITER = -1
  189:          GO TO 40
  190:       END IF
  191: *
  192: *     Compute some constants.
  193: *
  194:       ANRM = DLANGE( 'I', N, N, A, LDA, WORK )
  195:       EPS = DLAMCH( 'Epsilon' )
  196:       CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX
  197: *
  198: *     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
  199: *
  200:       PTSA = 1
  201:       PTSX = PTSA + N*N
  202: *
  203: *     Convert B from double precision to single precision and store the
  204: *     result in SX.
  205: *
  206:       CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )
  207: *
  208:       IF( INFO.NE.0 ) THEN
  209:          ITER = -2
  210:          GO TO 40
  211:       END IF
  212: *
  213: *     Convert A from double precision to single precision and store the
  214: *     result in SA.
  215: *
  216:       CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )
  217: *
  218:       IF( INFO.NE.0 ) THEN
  219:          ITER = -2
  220:          GO TO 40
  221:       END IF
  222: *
  223: *     Compute the LU factorization of SA.
  224: *
  225:       CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )
  226: *
  227:       IF( INFO.NE.0 ) THEN
  228:          ITER = -3
  229:          GO TO 40
  230:       END IF
  231: *
  232: *     Solve the system SA*SX = SB.
  233: *
  234:       CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
  235:      +             SWORK( PTSX ), N, INFO )
  236: *
  237: *     Convert SX back to double precision
  238: *
  239:       CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )
  240: *
  241: *     Compute R = B - AX (R is WORK).
  242: *
  243:       CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
  244: *
  245:       CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A,
  246:      +            LDA, X, LDX, ONE, WORK, N )
  247: *
  248: *     Check whether the NRHS normwise backward errors satisfy the
  249: *     stopping criterion. If yes, set ITER=0 and return.
  250: *
  251:       DO I = 1, NRHS
  252:          XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
  253:          RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
  254:          IF( RNRM.GT.XNRM*CTE )
  255:      +      GO TO 10
  256:       END DO
  257: *
  258: *     If we are here, the NRHS normwise backward errors satisfy the
  259: *     stopping criterion. We are good to exit.
  260: *
  261:       ITER = 0
  262:       RETURN
  263: *
  264:    10 CONTINUE
  265: *
  266:       DO 30 IITER = 1, ITERMAX
  267: *
  268: *        Convert R (in WORK) from double precision to single precision
  269: *        and store the result in SX.
  270: *
  271:          CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )
  272: *
  273:          IF( INFO.NE.0 ) THEN
  274:             ITER = -2
  275:             GO TO 40
  276:          END IF
  277: *
  278: *        Solve the system SA*SX = SR.
  279: *
  280:          CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
  281:      +                SWORK( PTSX ), N, INFO )
  282: *
  283: *        Convert SX back to double precision and update the current
  284: *        iterate.
  285: *
  286:          CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )
  287: *
  288:          DO I = 1, NRHS
  289:             CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )
  290:          END DO
  291: *
  292: *        Compute R = B - AX (R is WORK).
  293: *
  294:          CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
  295: *
  296:          CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE,
  297:      +               A, LDA, X, LDX, ONE, WORK, N )
  298: *
  299: *        Check whether the NRHS normwise backward errors satisfy the
  300: *        stopping criterion. If yes, set ITER=IITER>0 and return.
  301: *
  302:          DO I = 1, NRHS
  303:             XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
  304:             RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
  305:             IF( RNRM.GT.XNRM*CTE )
  306:      +         GO TO 20
  307:          END DO
  308: *
  309: *        If we are here, the NRHS normwise backward errors satisfy the
  310: *        stopping criterion, we are good to exit.
  311: *
  312:          ITER = IITER
  313: *
  314:          RETURN
  315: *
  316:    20    CONTINUE
  317: *
  318:    30 CONTINUE
  319: *
  320: *     If we are at this place of the code, this is because we have
  321: *     performed ITER=ITERMAX iterations and never satisified the
  322: *     stopping criterion, set up the ITER flag accordingly and follow up
  323: *     on double precision routine.
  324: *
  325:       ITER = -ITERMAX - 1
  326: *
  327:    40 CONTINUE
  328: *
  329: *     Single-precision iterative refinement failed to converge to a
  330: *     satisfactory solution, so we resort to double precision.
  331: *
  332:       CALL DGETRF( N, N, A, LDA, IPIV, INFO )
  333: *
  334:       IF( INFO.NE.0 )
  335:      +   RETURN
  336: *
  337:       CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )
  338:       CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,
  339:      +             INFO )
  340: *
  341:       RETURN
  342: *
  343: *     End of DSGESV.
  344: *
  345:       END

CVSweb interface <joel.bertrand@systella.fr>