File:  [local] / rpl / lapack / lapack / zlatrs3.f
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:55:31 2023 UTC (8 months, 3 weeks ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD
Ajout de fichiers de lapack 3.11

    1: *> \brief \b ZLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
    2: *
    3: *  Definition:
    4: *  ===========
    5: *
    6: *      SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
    7: *                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
    8: *
    9: *       .. Scalar Arguments ..
   10: *       CHARACTER          DIAG, NORMIN, TRANS, UPLO
   11: *       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
   12: *       ..
   13: *       .. Array Arguments ..
   14: *       DOUBLE PRECISION   CNORM( * ), SCALE( * ), WORK( * )
   15: *       COMPLEX*16         A( LDA, * ), X( LDX, * )
   16: *       ..
   17: *
   18: *
   19: *> \par Purpose:
   20: *  =============
   21: *>
   22: *> \verbatim
   23: *>
   24: *> ZLATRS3 solves one of the triangular systems
   25: *>
   26: *>    A * X = B * diag(scale),  A**T * X = B * diag(scale), or
   27: *>    A**H * X = B * diag(scale)
   28: *>
   29: *> with scaling to prevent overflow.  Here A is an upper or lower
   30: *> triangular matrix, A**T denotes the transpose of A, A**H denotes the
   31: *> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
   32: *> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
   33: *> is usually less than or equal to 1, chosen such that X(:,j) is less
   34: *> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
   35: *> for some j), then a non-trivial solution to A*X = 0 is returned. If
   36: *> the system is so badly scaled that the solution cannot be represented
   37: *> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
   38: *>
   39: *> This is a BLAS-3 version of LATRS for solving several right
   40: *> hand sides simultaneously.
   41: *>
   42: *> \endverbatim
   43: *
   44: *  Arguments:
   45: *  ==========
   46: *
   47: *> \param[in] UPLO
   48: *> \verbatim
   49: *>          UPLO is CHARACTER*1
   50: *>          Specifies whether the matrix A is upper or lower triangular.
   51: *>          = 'U':  Upper triangular
   52: *>          = 'L':  Lower triangular
   53: *> \endverbatim
   54: *>
   55: *> \param[in] TRANS
   56: *> \verbatim
   57: *>          TRANS is CHARACTER*1
   58: *>          Specifies the operation applied to A.
   59: *>          = 'N':  Solve A * x = s*b  (No transpose)
   60: *>          = 'T':  Solve A**T* x = s*b  (Transpose)
   61: *>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose)
   62: *> \endverbatim
   63: *>
   64: *> \param[in] DIAG
   65: *> \verbatim
   66: *>          DIAG is CHARACTER*1
   67: *>          Specifies whether or not the matrix A is unit triangular.
   68: *>          = 'N':  Non-unit triangular
   69: *>          = 'U':  Unit triangular
   70: *> \endverbatim
   71: *>
   72: *> \param[in] NORMIN
   73: *> \verbatim
   74: *>          NORMIN is CHARACTER*1
   75: *>          Specifies whether CNORM has been set or not.
   76: *>          = 'Y':  CNORM contains the column norms on entry
   77: *>          = 'N':  CNORM is not set on entry.  On exit, the norms will
   78: *>                  be computed and stored in CNORM.
   79: *> \endverbatim
   80: *>
   81: *> \param[in] N
   82: *> \verbatim
   83: *>          N is INTEGER
   84: *>          The order of the matrix A.  N >= 0.
   85: *> \endverbatim
   86: *>
   87: *> \param[in] NRHS
   88: *> \verbatim
   89: *>          NRHS is INTEGER
   90: *>          The number of columns of X.  NRHS >= 0.
   91: *> \endverbatim
   92: *>
   93: *> \param[in] A
   94: *> \verbatim
   95: *>          A is COMPLEX*16 array, dimension (LDA,N)
   96: *>          The triangular matrix A.  If UPLO = 'U', the leading n by n
   97: *>          upper triangular part of the array A contains the upper
   98: *>          triangular matrix, and the strictly lower triangular part of
   99: *>          A is not referenced.  If UPLO = 'L', the leading n by n lower
  100: *>          triangular part of the array A contains the lower triangular
  101: *>          matrix, and the strictly upper triangular part of A is not
  102: *>          referenced.  If DIAG = 'U', the diagonal elements of A are
  103: *>          also not referenced and are assumed to be 1.
  104: *> \endverbatim
  105: *>
  106: *> \param[in] LDA
  107: *> \verbatim
  108: *>          LDA is INTEGER
  109: *>          The leading dimension of the array A.  LDA >= max (1,N).
  110: *> \endverbatim
  111: *>
  112: *> \param[in,out] X
  113: *> \verbatim
  114: *>          X is COMPLEX*16 array, dimension (LDX,NRHS)
  115: *>          On entry, the right hand side B of the triangular system.
  116: *>          On exit, X is overwritten by the solution matrix X.
  117: *> \endverbatim
  118: *>
  119: *> \param[in] LDX
  120: *> \verbatim
  121: *>          LDX is INTEGER
  122: *>          The leading dimension of the array X.  LDX >= max (1,N).
  123: *> \endverbatim
  124: *>
  125: *> \param[out] SCALE
  126: *> \verbatim
  127: *>          SCALE is DOUBLE PRECISION array, dimension (NRHS)
  128: *>          The scaling factor s(k) is for the triangular system
  129: *>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
  130: *>          If SCALE = 0, the matrix A is singular or badly scaled.
  131: *>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
  132: *>          that is an exact or approximate solution to A*x(:,k) = 0
  133: *>          is returned. If the system so badly scaled that solution
  134: *>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
  135: *>          is returned.
  136: *> \endverbatim
  137: *>
  138: *> \param[in,out] CNORM
  139: *> \verbatim
  140: *>          CNORM is DOUBLE PRECISION array, dimension (N)
  141: *>
  142: *>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
  143: *>          contains the norm of the off-diagonal part of the j-th column
  144: *>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
  145: *>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
  146: *>          must be greater than or equal to the 1-norm.
  147: *>
  148: *>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
  149: *>          returns the 1-norm of the offdiagonal part of the j-th column
  150: *>          of A.
  151: *> \endverbatim
  152: *>
  153: *> \param[out] WORK
  154: *> \verbatim
  155: *>          WORK is DOUBLE PRECISION array, dimension (LWORK).
  156: *>          On exit, if INFO = 0, WORK(1) returns the optimal size of
  157: *>          WORK.
  158: *> \endverbatim
  159: *>
  160: *> \param[in] LWORK
  161: *>          LWORK is INTEGER
  162: *>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
  163: *>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
  164: *>
  165: *>          If LWORK = -1, then a workspace query is assumed; the routine
  166: *>          only calculates the optimal dimensions of the WORK array, returns
  167: *>          this value as the first entry of the WORK array, and no error
  168: *>          message related to LWORK is issued by XERBLA.
  169: *>
  170: *> \param[out] INFO
  171: *> \verbatim
  172: *>          INFO is INTEGER
  173: *>          = 0:  successful exit
  174: *>          < 0:  if INFO = -k, the k-th argument had an illegal value
  175: *> \endverbatim
  176: *
  177: *  Authors:
  178: *  ========
  179: *
  180: *> \author Univ. of Tennessee
  181: *> \author Univ. of California Berkeley
  182: *> \author Univ. of Colorado Denver
  183: *> \author NAG Ltd.
  184: *
  185: *> \ingroup doubleOTHERauxiliary
  186: *> \par Further Details:
  187: *  =====================
  188: *  \verbatim
  189: *  The algorithm follows the structure of a block triangular solve.
  190: *  The diagonal block is solved with a call to the robust the triangular
  191: *  solver LATRS for every right-hand side RHS = 1, ..., NRHS
  192: *     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
  193: *  where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
  194: *  The linear block updates operate on block columns of X,
  195: *     B( I, K ) - op(A( I, J )) * X( J, K )
  196: *  and use GEMM. To avoid overflow in the linear block update, the worst case
  197: *  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
  198: *  such that
  199: *     || s * B( I, RHS )||_oo
  200: *   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
  201: *
  202: *  Once all columns of a block column have been rescaled (BLAS-1), the linear
  203: *  update is executed with GEMM without overflow.
  204: *
  205: *  To limit rescaling, local scale factors track the scaling of column segments.
  206: *  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
  207: *  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
  208: *  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
  209: *  I = 1, ..., NBA.
  210: *  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
  211: *  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
  212: *  linear update of potentially inconsistently scaled vector segments
  213: *     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
  214: *  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
  215: *  if necessary, rescales the blocks prior to calling GEMM.
  216: *
  217: *  \endverbatim
  218: *  =====================================================================
  219: *  References:
  220: *  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
  221: *  Parallel robust solution of triangular linear systems. Concurrency
  222: *  and Computation: Practice and Experience, 31(19), e5064.
  223: *
  224: *  Contributor:
  225: *   Angelika Schwarz, Umea University, Sweden.
  226: *
  227: *  =====================================================================
  228:       SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
  229:      $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
  230:       IMPLICIT NONE
  231: *
  232: *     .. Scalar Arguments ..
  233:       CHARACTER          DIAG, TRANS, NORMIN, UPLO
  234:       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
  235: *     ..
  236: *     .. Array Arguments ..
  237:       COMPLEX*16         A( LDA, * ), X( LDX, * )
  238:       DOUBLE PRECISION   CNORM( * ), SCALE( * ), WORK( * )
  239: *     ..
  240: *
  241: *  =====================================================================
  242: *
  243: *     .. Parameters ..
  244:       DOUBLE PRECISION   ZERO, ONE
  245:       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
  246:       COMPLEX*16         CZERO, CONE
  247:       PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
  248:       PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ) )
  249:       INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
  250:       PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
  251:       PARAMETER          ( NBMIN = 8, NBMAX = 64 )
  252: *     ..
  253: *     .. Local Arrays ..
  254:       DOUBLE PRECISION   W( NBMAX ), XNRM( NBRHS )
  255: *     ..
  256: *     .. Local Scalars ..
  257:       LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
  258:       INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
  259:      $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
  260:      $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
  261:       DOUBLE PRECISION   ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
  262:      $                   SCAMIN, SMLNUM, TMAX
  263: *     ..
  264: *     .. External Functions ..
  265:       LOGICAL            LSAME
  266:       INTEGER            ILAENV
  267:       DOUBLE PRECISION   DLAMCH, ZLANGE, DLARMM
  268:       EXTERNAL           ILAENV, LSAME, DLAMCH, ZLANGE, DLARMM
  269: *     ..
  270: *     .. External Subroutines ..
  271:       EXTERNAL           ZLATRS, ZDSCAL, XERBLA
  272: *     ..
  273: *     .. Intrinsic Functions ..
  274:       INTRINSIC          ABS, MAX, MIN
  275: *     ..
  276: *     .. Executable Statements ..
  277: *
  278:       INFO = 0
  279:       UPPER = LSAME( UPLO, 'U' )
  280:       NOTRAN = LSAME( TRANS, 'N' )
  281:       NOUNIT = LSAME( DIAG, 'N' )
  282:       LQUERY = ( LWORK.EQ.-1 )
  283: *
  284: *     Partition A and X into blocks.
  285: *
  286:       NB = MAX( NBMIN, ILAENV( 1, 'ZLATRS', '', N, N, -1, -1 ) )
  287:       NB = MIN( NBMAX, NB )
  288:       NBA = MAX( 1, (N + NB - 1) / NB )
  289:       NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
  290: *
  291: *     Compute the workspace
  292: *
  293: *     The workspace comprises two parts.
  294: *     The first part stores the local scale factors. Each simultaneously
  295: *     computed right-hand side requires one local scale factor per block
  296: *     row. WORK( I + KK * LDS ) is the scale factor of the vector
  297: *     segment associated with the I-th block row and the KK-th vector
  298: *     in the block column.
  299:       LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
  300:       LDS = NBA
  301: *     The second part stores upper bounds of the triangular A. There are
  302: *     a total of NBA x NBA blocks, of which only the upper triangular
  303: *     part or the lower triangular part is referenced. The upper bound of
  304: *     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
  305:       LANRM = NBA * NBA
  306:       AWRK = LSCALE
  307:       WORK( 1 ) = LSCALE + LANRM
  308: *
  309: *     Test the input parameters.
  310: *
  311:       IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
  312:          INFO = -1
  313:       ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
  314:      $         LSAME( TRANS, 'C' ) ) THEN
  315:          INFO = -2
  316:       ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
  317:          INFO = -3
  318:       ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
  319:      $         LSAME( NORMIN, 'N' ) ) THEN
  320:          INFO = -4
  321:       ELSE IF( N.LT.0 ) THEN
  322:          INFO = -5
  323:       ELSE IF( NRHS.LT.0 ) THEN
  324:          INFO = -6
  325:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  326:          INFO = -8
  327:       ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
  328:          INFO = -10
  329:       ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
  330:          INFO = -14
  331:       END IF
  332:       IF( INFO.NE.0 ) THEN
  333:          CALL XERBLA( 'ZLATRS3', -INFO )
  334:          RETURN
  335:       ELSE IF( LQUERY ) THEN
  336:          RETURN
  337:       END IF
  338: *
  339: *     Initialize scaling factors
  340: *
  341:       DO KK = 1, NRHS
  342:          SCALE( KK ) = ONE
  343:       END DO
  344: *
  345: *     Quick return if possible
  346: *
  347:       IF( MIN( N, NRHS ).EQ.0 )
  348:      $   RETURN
  349: *
  350: *     Determine machine dependent constant to control overflow.
  351: *
  352:       BIGNUM = DLAMCH( 'Overflow' )
  353:       SMLNUM = DLAMCH( 'Safe Minimum' )
  354: *
  355: *     Use unblocked code for small problems
  356: *
  357:       IF( NRHS.LT.NRHSMIN ) THEN
  358:          CALL ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
  359:      $                SCALE( 1 ), CNORM, INFO )
  360:          DO K = 2, NRHS
  361:             CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
  362:      $                   SCALE( K ), CNORM, INFO )
  363:          END DO
  364:          RETURN
  365:       END IF
  366: *
  367: *     Compute norms of blocks of A excluding diagonal blocks and find
  368: *     the block with the largest norm TMAX.
  369: *
  370:       TMAX = ZERO
  371:       DO J = 1, NBA
  372:          J1 = (J-1)*NB + 1
  373:          J2 = MIN( J*NB, N ) + 1
  374:          IF ( UPPER ) THEN
  375:             IFIRST = 1
  376:             ILAST = J - 1
  377:          ELSE
  378:             IFIRST = J + 1
  379:             ILAST = NBA
  380:          END IF
  381:          DO I = IFIRST, ILAST
  382:             I1 = (I-1)*NB + 1
  383:             I2 = MIN( I*NB, N ) + 1
  384: *
  385: *           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
  386: *
  387:             IF( NOTRAN ) THEN
  388:                ANRM = ZLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
  389:                WORK( AWRK + I+(J-1)*NBA ) = ANRM
  390:             ELSE
  391:                ANRM = ZLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
  392:                WORK( AWRK + J+(I-1) * NBA ) = ANRM
  393:             END IF
  394:             TMAX = MAX( TMAX, ANRM )
  395:          END DO
  396:       END DO
  397: *
  398:       IF( .NOT. TMAX.LE.DLAMCH('Overflow') ) THEN
  399: *
  400: *        Some matrix entries have huge absolute value. At least one upper
  401: *        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
  402: *        number, either due to overflow in LANGE or due to Inf in A.
  403: *        Fall back to LATRS. Set normin = 'N' for every right-hand side to
  404: *        force computation of TSCAL in LATRS to avoid the likely overflow
  405: *        in the computation of the column norms CNORM.
  406: *
  407:          DO K = 1, NRHS
  408:             CALL ZLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
  409:      $                   SCALE( K ), CNORM, INFO )
  410:          END DO
  411:          RETURN
  412:       END IF
  413: *
  414: *     Every right-hand side requires workspace to store NBA local scale
  415: *     factors. To save workspace, X is computed successively in block columns
  416: *     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
  417: *     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
  418:       DO K = 1, NBX
  419: *        Loop over block columns (index = K) of X and, for column-wise scalings,
  420: *        over individual columns (index = KK).
  421: *        K1: column index of the first column in X( J, K )
  422: *        K2: column index of the first column in X( J, K+1 )
  423: *        so the K2 - K1 is the column count of the block X( J, K )
  424:          K1 = (K-1)*NBRHS + 1
  425:          K2 = MIN( K*NBRHS, NRHS ) + 1
  426: *
  427: *        Initialize local scaling factors of current block column X( J, K )
  428: *
  429:          DO KK = 1, K2 - K1
  430:             DO I = 1, NBA
  431:                WORK( I+KK*LDS ) = ONE
  432:             END DO
  433:          END DO
  434: *
  435:          IF( NOTRAN ) THEN
  436: *
  437: *           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
  438: *
  439:             IF( UPPER ) THEN
  440:                JFIRST = NBA
  441:                JLAST = 1
  442:                JINC = -1
  443:             ELSE
  444:                JFIRST = 1
  445:                JLAST = NBA
  446:                JINC = 1
  447:             END IF
  448:          ELSE
  449: *
  450: *           Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
  451: *           where op(A) = A**T or op(A) = A**H
  452: *
  453:             IF( UPPER ) THEN
  454:                JFIRST = 1
  455:                JLAST = NBA
  456:                JINC = 1
  457:             ELSE
  458:                JFIRST = NBA
  459:                JLAST = 1
  460:                JINC = -1
  461:             END IF
  462:          END IF
  463: 
  464:          DO J = JFIRST, JLAST, JINC
  465: *           J1: row index of the first row in A( J, J )
  466: *           J2: row index of the first row in A( J+1, J+1 )
  467: *           so that J2 - J1 is the row count of the block A( J, J )
  468:             J1 = (J-1)*NB + 1
  469:             J2 = MIN( J*NB, N ) + 1
  470: *
  471: *           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
  472: *
  473:             DO KK = 1, K2 - K1
  474:                RHS = K1 + KK - 1
  475:                IF( KK.EQ.1 ) THEN
  476:                   CALL ZLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
  477:      $                         A( J1, J1 ), LDA, X( J1, RHS ),
  478:      $                         SCALOC, CNORM, INFO )
  479:                ELSE
  480:                   CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
  481:      $                         A( J1, J1 ), LDA, X( J1, RHS ),
  482:      $                         SCALOC, CNORM, INFO )
  483:                END IF
  484: *              Find largest absolute value entry in the vector segment
  485: *              X( J1:J2-1, RHS ) as an upper bound for the worst case
  486: *              growth in the linear updates.
  487:                XNRM( KK ) = ZLANGE( 'I', J2-J1, 1, X( J1, RHS ),
  488:      $                              LDX, W )
  489: *
  490:                IF( SCALOC .EQ. ZERO ) THEN
  491: *                 LATRS found that A is singular through A(j,j) = 0.
  492: *                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
  493: *                 and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
  494: *                 set by LATRS.
  495:                   SCALE( RHS ) = ZERO
  496:                   DO II = 1, J1-1
  497:                      X( II, KK ) = CZERO
  498:                   END DO
  499:                   DO II = J2, N
  500:                      X( II, KK ) = CZERO
  501:                   END DO
  502: *                 Discard the local scale factors.
  503:                   DO II = 1, NBA
  504:                      WORK( II+KK*LDS ) = ONE
  505:                   END DO
  506:                   SCALOC = ONE
  507:                ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
  508: *                 LATRS computed a valid scale factor, but combined with
  509: *                 the current scaling the solution does not have a
  510: *                 scale factor > 0.
  511: *
  512: *                 Set WORK( J+KK*LDS ) to smallest valid scale
  513: *                 factor and increase SCALOC accordingly.
  514:                   SCAL = WORK( J+KK*LDS ) / SMLNUM
  515:                   SCALOC = SCALOC * SCAL
  516:                   WORK( J+KK*LDS ) = SMLNUM
  517: *                 If LATRS overestimated the growth, x may be
  518: *                 rescaled to preserve a valid combined scale
  519: *                 factor WORK( J, KK ) > 0.
  520:                   RSCAL = ONE / SCALOC
  521:                   IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
  522:                      XNRM( KK ) = XNRM( KK ) * RSCAL
  523:                      CALL ZDSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
  524:                      SCALOC = ONE
  525:                   ELSE
  526: *                    The system op(A) * x = b is badly scaled and its
  527: *                    solution cannot be represented as (1/scale) * x.
  528: *                    Set x to zero. This approach deviates from LATRS
  529: *                    where a completely meaningless non-zero vector
  530: *                    is returned that is not a solution to op(A) * x = b.
  531:                      SCALE( RHS ) = ZERO
  532:                      DO II = 1, N
  533:                         X( II, KK ) = CZERO
  534:                      END DO
  535: *                    Discard the local scale factors.
  536:                      DO II = 1, NBA
  537:                         WORK( II+KK*LDS ) = ONE
  538:                      END DO
  539:                      SCALOC = ONE
  540:                   END IF
  541:                END IF
  542:                SCALOC = SCALOC * WORK( J+KK*LDS )
  543:                WORK( J+KK*LDS ) = SCALOC
  544:             END DO
  545: *
  546: *           Linear block updates
  547: *
  548:             IF( NOTRAN ) THEN
  549:                IF( UPPER ) THEN
  550:                   IFIRST = J - 1
  551:                   ILAST = 1
  552:                   IINC = -1
  553:                ELSE
  554:                   IFIRST = J + 1
  555:                   ILAST = NBA
  556:                   IINC = 1
  557:                END IF
  558:             ELSE
  559:                IF( UPPER ) THEN
  560:                   IFIRST = J + 1
  561:                   ILAST = NBA
  562:                   IINC = 1
  563:                ELSE
  564:                   IFIRST = J - 1
  565:                   ILAST = 1
  566:                   IINC = -1
  567:                END IF
  568:             END IF
  569: *
  570:             DO I = IFIRST, ILAST, IINC
  571: *              I1: row index of the first column in X( I, K )
  572: *              I2: row index of the first column in X( I+1, K )
  573: *              so the I2 - I1 is the row count of the block X( I, K )
  574:                I1 = (I-1)*NB + 1
  575:                I2 = MIN( I*NB, N ) + 1
  576: *
  577: *              Prepare the linear update to be executed with GEMM.
  578: *              For each column, compute a consistent scaling, a
  579: *              scaling factor to survive the linear update, and
  580: *              rescale the column segments, if necesssary. Then
  581: *              the linear update is safely executed.
  582: *
  583:                DO KK = 1, K2 - K1
  584:                   RHS = K1 + KK - 1
  585: *                 Compute consistent scaling
  586:                   SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
  587: *
  588: *                 Compute scaling factor to survive the linear update
  589: *                 simulating consistent scaling.
  590: *
  591:                   BNRM = ZLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
  592:                   BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
  593:                   XNRM( KK ) = XNRM( KK )*( SCAMIN / WORK( J+KK*LDS) )
  594:                   ANRM = WORK( AWRK + I+(J-1)*NBA )
  595:                   SCALOC = DLARMM( ANRM, XNRM( KK ), BNRM )
  596: *
  597: *                 Simultaneously apply the robust update factor and the
  598: *                 consistency scaling factor to X( I, KK ) and X( J, KK ).
  599: *
  600:                   SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
  601:                   IF( SCAL.NE.ONE ) THEN
  602:                      CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
  603:                      WORK( I+KK*LDS ) = SCAMIN*SCALOC
  604:                   END IF
  605: *
  606:                   SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
  607:                   IF( SCAL.NE.ONE ) THEN
  608:                      CALL ZDSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
  609:                      WORK( J+KK*LDS ) = SCAMIN*SCALOC
  610:                   END IF
  611:                END DO
  612: *
  613:                IF( NOTRAN ) THEN
  614: *
  615: *                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
  616: *
  617:                   CALL ZGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -CONE,
  618:      $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
  619:      $                        CONE, X( I1, K1 ), LDX )
  620:                ELSE IF( LSAME( TRANS, 'T' ) ) THEN
  621: *
  622: *                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
  623: *
  624:                   CALL ZGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -CONE,
  625:      $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
  626:      $                        CONE, X( I1, K1 ), LDX )
  627:                ELSE
  628: *
  629: *                 B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
  630: *
  631:                   CALL ZGEMM( 'C', 'N', I2-I1, K2-K1, J2-J1, -CONE,
  632:      $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
  633:      $                        CONE, X( I1, K1 ), LDX )
  634:                END IF
  635:             END DO
  636:          END DO
  637: 
  638: *
  639: *        Reduce local scaling factors
  640: *
  641:          DO KK = 1, K2 - K1
  642:             RHS = K1 + KK - 1
  643:             DO I = 1, NBA
  644:                SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
  645:             END DO
  646:          END DO
  647: *
  648: *        Realize consistent scaling
  649: *
  650:          DO KK = 1, K2 - K1
  651:             RHS = K1 + KK - 1
  652:             IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
  653:                DO I = 1, NBA
  654:                   I1 = (I - 1) * NB + 1
  655:                   I2 = MIN( I * NB, N ) + 1
  656:                   SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
  657:                   IF( SCAL.NE.ONE )
  658:      $               CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
  659:                END DO
  660:             END IF
  661:          END DO
  662:       END DO
  663:       RETURN
  664: *
  665: *     End of ZLATRS3
  666: *
  667:       END

CVSweb interface <joel.bertrand@systella.fr>