rpl/lapack/lapack/dsyevd_2stage.f - view

Return to dsyevd_2stage.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / dsyevd_2stage.f
Revision 1.4: download - view: text, annotated - select for diffs - revision graph
Tue May 29 07:18:08 2018 UTC (7 years, 1 month ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_31, rpl-4_1_30, rpl-4_1_29, rpl-4_1_28, HEAD

Mise à jour de Lapack.

    1: *> \brief <b> DSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b>
    2: *
    3: *  @precisions fortran d -> s
    4: *
    5: *  =========== DOCUMENTATION ===========
    6: *
    7: * Online html documentation available at
    8: *            http://www.netlib.org/lapack/explore-html/
    9: *
   10: *> \htmlonly
   11: *> Download DSYEVD_2STAGE + dependencies
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevd_2stage.f">
   13: *> [TGZ]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevd_2stage.f">
   15: *> [ZIP]</a>
   16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevd_2stage.f">
   17: *> [TXT]</a>
   18: *> \endhtmlonly
   19: *
   20: *  Definition:
   21: *  ===========
   22: *
   23: *       SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
   24: *                                IWORK, LIWORK, INFO )
   25: *
   26: *       IMPLICIT NONE
   27: *
   28: *       .. Scalar Arguments ..
   29: *       CHARACTER          JOBZ, UPLO
   30: *       INTEGER            INFO, LDA, LIWORK, LWORK, N
   31: *       ..
   32: *       .. Array Arguments ..
   33: *       INTEGER            IWORK( * )
   34: *       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
   35: *       ..
   36: *
   37: *
   38: *> \par Purpose:
   39: *  =============
   40: *>
   41: *> \verbatim
   42: *>
   43: *> DSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
   44: *> real symmetric matrix A using the 2stage technique for
   45: *> the reduction to tridiagonal. If eigenvectors are desired, it uses a
   46: *> divide and conquer algorithm.
   47: *>
   48: *> The divide and conquer algorithm makes very mild assumptions about
   49: *> floating point arithmetic. It will work on machines with a guard
   50: *> digit in add/subtract, or on those binary machines without guard
   51: *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
   52: *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
   53: *> without guard digits, but we know of none.
   54: *> \endverbatim
   55: *
   56: *  Arguments:
   57: *  ==========
   58: *
   59: *> \param[in] JOBZ
   60: *> \verbatim
   61: *>          JOBZ is CHARACTER*1
   62: *>          = 'N':  Compute eigenvalues only;
   63: *>          = 'V':  Compute eigenvalues and eigenvectors.
   64: *>                  Not available in this release.
   65: *> \endverbatim
   66: *>
   67: *> \param[in] UPLO
   68: *> \verbatim
   69: *>          UPLO is CHARACTER*1
   70: *>          = 'U':  Upper triangle of A is stored;
   71: *>          = 'L':  Lower triangle of A is stored.
   72: *> \endverbatim
   73: *>
   74: *> \param[in] N
   75: *> \verbatim
   76: *>          N is INTEGER
   77: *>          The order of the matrix A.  N >= 0.
   78: *> \endverbatim
   79: *>
   80: *> \param[in,out] A
   81: *> \verbatim
   82: *>          A is DOUBLE PRECISION array, dimension (LDA, N)
   83: *>          On entry, the symmetric matrix A.  If UPLO = 'U', the
   84: *>          leading N-by-N upper triangular part of A contains the
   85: *>          upper triangular part of the matrix A.  If UPLO = 'L',
   86: *>          the leading N-by-N lower triangular part of A contains
   87: *>          the lower triangular part of the matrix A.
   88: *>          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
   89: *>          orthonormal eigenvectors of the matrix A.
   90: *>          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
   91: *>          or the upper triangle (if UPLO='U') of A, including the
   92: *>          diagonal, is destroyed.
   93: *> \endverbatim
   94: *>
   95: *> \param[in] LDA
   96: *> \verbatim
   97: *>          LDA is INTEGER
   98: *>          The leading dimension of the array A.  LDA >= max(1,N).
   99: *> \endverbatim
  100: *>
  101: *> \param[out] W
  102: *> \verbatim
  103: *>          W is DOUBLE PRECISION array, dimension (N)
  104: *>          If INFO = 0, the eigenvalues in ascending order.
  105: *> \endverbatim
  106: *>
  107: *> \param[out] WORK
  108: *> \verbatim
  109: *>          WORK is DOUBLE PRECISION array,
  110: *>                                         dimension (LWORK)
  111: *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
  112: *> \endverbatim
  113: *>
  114: *> \param[in] LWORK
  115: *> \verbatim
  116: *>          LWORK is INTEGER
  117: *>          The dimension of the array WORK.
  118: *>          If N <= 1,               LWORK must be at least 1.
  119: *>          If JOBZ = 'N' and N > 1, LWORK must be queried.
  120: *>                                   LWORK = MAX(1, dimension) where
  121: *>                                   dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1
  122: *>                                             = N*KD + N*max(KD+1,FACTOPTNB) 
  123: *>                                               + max(2*KD*KD, KD*NTHREADS) 
  124: *>                                               + (KD+1)*N + 2*N+1
  125: *>                                   where KD is the blocking size of the reduction,
  126: *>                                   FACTOPTNB is the blocking used by the QR or LQ
  127: *>                                   algorithm, usually FACTOPTNB=128 is a good choice
  128: *>                                   NTHREADS is the number of threads used when
  129: *>                                   openMP compilation is enabled, otherwise =1.
  130: *>          If JOBZ = 'V' and N > 1, LWORK must be at least
  131: *>                                                1 + 6*N + 2*N**2.
  132: *>
  133: *>          If LWORK = -1, then a workspace query is assumed; the routine
  134: *>          only calculates the optimal sizes of the WORK and IWORK
  135: *>          arrays, returns these values as the first entries of the WORK
  136: *>          and IWORK arrays, and no error message related to LWORK or
  137: *>          LIWORK is issued by XERBLA.
  138: *> \endverbatim
  139: *>
  140: *> \param[out] IWORK
  141: *> \verbatim
  142: *>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
  143: *>          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
  144: *> \endverbatim
  145: *>
  146: *> \param[in] LIWORK
  147: *> \verbatim
  148: *>          LIWORK is INTEGER
  149: *>          The dimension of the array IWORK.
  150: *>          If N <= 1,                LIWORK must be at least 1.
  151: *>          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
  152: *>          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
  153: *>
  154: *>          If LIWORK = -1, then a workspace query is assumed; the
  155: *>          routine only calculates the optimal sizes of the WORK and
  156: *>          IWORK arrays, returns these values as the first entries of
  157: *>          the WORK and IWORK arrays, and no error message related to
  158: *>          LWORK or LIWORK is issued by XERBLA.
  159: *> \endverbatim
  160: *>
  161: *> \param[out] INFO
  162: *> \verbatim
  163: *>          INFO is INTEGER
  164: *>          = 0:  successful exit
  165: *>          < 0:  if INFO = -i, the i-th argument had an illegal value
  166: *>          > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed
  167: *>                to converge; i off-diagonal elements of an intermediate
  168: *>                tridiagonal form did not converge to zero;
  169: *>                if INFO = i and JOBZ = 'V', then the algorithm failed
  170: *>                to compute an eigenvalue while working on the submatrix
  171: *>                lying in rows and columns INFO/(N+1) through
  172: *>                mod(INFO,N+1).
  173: *> \endverbatim
  174: *
  175: *  Authors:
  176: *  ========
  177: *
  178: *> \author Univ. of Tennessee
  179: *> \author Univ. of California Berkeley
  180: *> \author Univ. of Colorado Denver
  181: *> \author NAG Ltd.
  182: *
  183: *> \date November 2017
  184: *
  185: *> \ingroup doubleSYeigen
  186: *
  187: *> \par Contributors:
  188: *  ==================
  189: *>
  190: *> Jeff Rutter, Computer Science Division, University of California
  191: *> at Berkeley, USA \n
  192: *>  Modified by Francoise Tisseur, University of Tennessee \n
  193: *>  Modified description of INFO. Sven, 16 Feb 05. \n
  194: *> \par Further Details:
  195: *  =====================
  196: *>
  197: *> \verbatim
  198: *>
  199: *>  All details about the 2stage techniques are available in:
  200: *>
  201: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  202: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  203: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  204: *>  of 2011 International Conference for High Performance Computing,
  205: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  206: *>  Article 8 , 11 pages.
  207: *>  http://doi.acm.org/10.1145/2063384.2063394
  208: *>
  209: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  210: *>  An improved parallel singular value algorithm and its implementation 
  211: *>  for multicore hardware, In Proceedings of 2013 International Conference
  212: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  213: *>  Denver, Colorado, USA, 2013.
  214: *>  Article 90, 12 pages.
  215: *>  http://doi.acm.org/10.1145/2503210.2503292
  216: *>
  217: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  218: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  219: *>  calculations based on fine-grained memory aware tasks.
  220: *>  International Journal of High Performance Computing Applications.
  221: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  222: *>  http://hpc.sagepub.com/content/28/2/196 
  223: *>
  224: *> \endverbatim
  225: *
  226: *  =====================================================================
  227:       SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
  228:      $                          IWORK, LIWORK, INFO )
  229: *
  230:       IMPLICIT NONE
  231: *
  232: *  -- LAPACK driver routine (version 3.8.0) --
  233: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  234: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  235: *     November 2017
  236: *
  237: *     .. Scalar Arguments ..
  238:       CHARACTER          JOBZ, UPLO
  239:       INTEGER            INFO, LDA, LIWORK, LWORK, N
  240: *     ..
  241: *     .. Array Arguments ..
  242:       INTEGER            IWORK( * )
  243:       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
  244: *     ..
  245: *
  246: *  =====================================================================
  247: *
  248: *     .. Parameters ..
  249:       DOUBLE PRECISION   ZERO, ONE
  250:       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
  251: *     ..
  252: *     .. Local Scalars ..
  253: *
  254:       LOGICAL            LOWER, LQUERY, WANTZ
  255:       INTEGER            IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE,
  256:      $                   LIWMIN, LLWORK, LLWRK2, LWMIN,
  257:      $                   LHTRD, LWTRD, KD, IB, INDHOUS
  258:       DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
  259:      $                   SMLNUM
  260: *     ..
  261: *     .. External Functions ..
  262:       LOGICAL            LSAME
  263:       INTEGER            ILAENV2STAGE
  264:       DOUBLE PRECISION   DLAMCH, DLANSY
  265:       EXTERNAL           LSAME, DLAMCH, DLANSY, ILAENV2STAGE
  266: *     ..
  267: *     .. External Subroutines ..
  268:       EXTERNAL           DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF,
  269:      $                   DSYTRD_2STAGE, XERBLA
  270: *     ..
  271: *     .. Intrinsic Functions ..
  272:       INTRINSIC          MAX, SQRT
  273: *     ..
  274: *     .. Executable Statements ..
  275: *
  276: *     Test the input parameters.
  277: *
  278:       WANTZ = LSAME( JOBZ, 'V' )
  279:       LOWER = LSAME( UPLO, 'L' )
  280:       LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
  281: *
  282:       INFO = 0
  283:       IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
  284:          INFO = -1
  285:       ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
  286:          INFO = -2
  287:       ELSE IF( N.LT.0 ) THEN
  288:          INFO = -3
  289:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  290:          INFO = -5
  291:       END IF
  292: *
  293:       IF( INFO.EQ.0 ) THEN
  294:          IF( N.LE.1 ) THEN
  295:             LIWMIN = 1
  296:             LWMIN = 1
  297:          ELSE
  298:             KD    = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ,
  299:      $                            N, -1, -1, -1 )
  300:             IB    = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ,
  301:      $                            N, KD, -1, -1 )
  302:             LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ,
  303:      $                            N, KD, IB, -1 )
  304:             LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ,
  305:      $                            N, KD, IB, -1 )
  306:             IF( WANTZ ) THEN
  307:                LIWMIN = 3 + 5*N
  308:                LWMIN = 1 + 6*N + 2*N**2
  309:             ELSE
  310:                LIWMIN = 1
  311:                LWMIN = 2*N + 1 + LHTRD + LWTRD
  312:             END IF
  313:          END IF
  314:          WORK( 1 )  = LWMIN
  315:          IWORK( 1 ) = LIWMIN
  316: *
  317:          IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
  318:             INFO = -8
  319:          ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
  320:             INFO = -10
  321:          END IF
  322:       END IF
  323: *
  324:       IF( INFO.NE.0 ) THEN
  325:          CALL XERBLA( 'DSYEVD_2STAGE', -INFO )
  326:          RETURN
  327:       ELSE IF( LQUERY ) THEN
  328:          RETURN
  329:       END IF
  330: *
  331: *     Quick return if possible
  332: *
  333:       IF( N.EQ.0 )
  334:      $   RETURN
  335: *
  336:       IF( N.EQ.1 ) THEN
  337:          W( 1 ) = A( 1, 1 )
  338:          IF( WANTZ )
  339:      $      A( 1, 1 ) = ONE
  340:          RETURN
  341:       END IF
  342: *
  343: *     Get machine constants.
  344: *
  345:       SAFMIN = DLAMCH( 'Safe minimum' )
  346:       EPS    = DLAMCH( 'Precision' )
  347:       SMLNUM = SAFMIN / EPS
  348:       BIGNUM = ONE / SMLNUM
  349:       RMIN   = SQRT( SMLNUM )
  350:       RMAX   = SQRT( BIGNUM )
  351: *
  352: *     Scale matrix to allowable range, if necessary.
  353: *
  354:       ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK )
  355:       ISCALE = 0
  356:       IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
  357:          ISCALE = 1
  358:          SIGMA = RMIN / ANRM
  359:       ELSE IF( ANRM.GT.RMAX ) THEN
  360:          ISCALE = 1
  361:          SIGMA = RMAX / ANRM
  362:       END IF
  363:       IF( ISCALE.EQ.1 )
  364:      $   CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
  365: *
  366: *     Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form.
  367: *
  368:       INDE    = 1
  369:       INDTAU  = INDE + N
  370:       INDHOUS = INDTAU + N
  371:       INDWRK  = INDHOUS + LHTRD
  372:       LLWORK  = LWORK - INDWRK + 1
  373:       INDWK2  = INDWRK + N*N
  374:       LLWRK2  = LWORK - INDWK2 + 1
  375: *
  376:       CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ),
  377:      $                    WORK( INDTAU ), WORK( INDHOUS ), LHTRD, 
  378:      $                    WORK( INDWRK ), LLWORK, IINFO )
  379: *
  380: *     For eigenvalues only, call DSTERF.  For eigenvectors, first call
  381: *     DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
  382: *     tridiagonal matrix, then call DORMTR to multiply it by the
  383: *     Householder transformations stored in A.
  384: *
  385:       IF( .NOT.WANTZ ) THEN
  386:          CALL DSTERF( N, W, WORK( INDE ), INFO )
  387:       ELSE
  388: *        Not available in this release, and agrument checking should not
  389: *        let it getting here
  390:          RETURN
  391:          CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
  392:      $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
  393:          CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
  394:      $                WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
  395:          CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
  396:       END IF
  397: *
  398: *     If matrix was scaled, then rescale eigenvalues appropriately.
  399: *
  400:       IF( ISCALE.EQ.1 )
  401:      $   CALL DSCAL( N, ONE / SIGMA, W, 1 )
  402: *
  403:       WORK( 1 )  = LWMIN
  404:       IWORK( 1 ) = LIWMIN
  405: *
  406:       RETURN
  407: *
  408: *     End of DSYEVD_2STAGE
  409: *
  410:       END

CVSweb interface <joel.bertrand@systella.fr>