rpl/lapack/lapack/dsyevd_2stage.f - view

Return to dsyevd_2stage.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / dsyevd_2stage.f
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:39:08 2023 UTC (23 months ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD

Première mise à jour de lapack et blas.

    1: *> \brief <b> DSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b>
    2: *
    3: *  @precisions fortran d -> s
    4: *
    5: *  =========== DOCUMENTATION ===========
    6: *
    7: * Online html documentation available at
    8: *            http://www.netlib.org/lapack/explore-html/
    9: *
   10: *> \htmlonly
   11: *> Download DSYEVD_2STAGE + dependencies
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevd_2stage.f">
   13: *> [TGZ]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevd_2stage.f">
   15: *> [ZIP]</a>
   16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevd_2stage.f">
   17: *> [TXT]</a>
   18: *> \endhtmlonly
   19: *
   20: *  Definition:
   21: *  ===========
   22: *
   23: *       SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
   24: *                                IWORK, LIWORK, INFO )
   25: *
   26: *       IMPLICIT NONE
   27: *
   28: *       .. Scalar Arguments ..
   29: *       CHARACTER          JOBZ, UPLO
   30: *       INTEGER            INFO, LDA, LIWORK, LWORK, N
   31: *       ..
   32: *       .. Array Arguments ..
   33: *       INTEGER            IWORK( * )
   34: *       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
   35: *       ..
   36: *
   37: *
   38: *> \par Purpose:
   39: *  =============
   40: *>
   41: *> \verbatim
   42: *>
   43: *> DSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
   44: *> real symmetric matrix A using the 2stage technique for
   45: *> the reduction to tridiagonal. If eigenvectors are desired, it uses a
   46: *> divide and conquer algorithm.
   47: *>
   48: *> The divide and conquer algorithm makes very mild assumptions about
   49: *> floating point arithmetic. It will work on machines with a guard
   50: *> digit in add/subtract, or on those binary machines without guard
   51: *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
   52: *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
   53: *> without guard digits, but we know of none.
   54: *> \endverbatim
   55: *
   56: *  Arguments:
   57: *  ==========
   58: *
   59: *> \param[in] JOBZ
   60: *> \verbatim
   61: *>          JOBZ is CHARACTER*1
   62: *>          = 'N':  Compute eigenvalues only;
   63: *>          = 'V':  Compute eigenvalues and eigenvectors.
   64: *>                  Not available in this release.
   65: *> \endverbatim
   66: *>
   67: *> \param[in] UPLO
   68: *> \verbatim
   69: *>          UPLO is CHARACTER*1
   70: *>          = 'U':  Upper triangle of A is stored;
   71: *>          = 'L':  Lower triangle of A is stored.
   72: *> \endverbatim
   73: *>
   74: *> \param[in] N
   75: *> \verbatim
   76: *>          N is INTEGER
   77: *>          The order of the matrix A.  N >= 0.
   78: *> \endverbatim
   79: *>
   80: *> \param[in,out] A
   81: *> \verbatim
   82: *>          A is DOUBLE PRECISION array, dimension (LDA, N)
   83: *>          On entry, the symmetric matrix A.  If UPLO = 'U', the
   84: *>          leading N-by-N upper triangular part of A contains the
   85: *>          upper triangular part of the matrix A.  If UPLO = 'L',
   86: *>          the leading N-by-N lower triangular part of A contains
   87: *>          the lower triangular part of the matrix A.
   88: *>          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
   89: *>          orthonormal eigenvectors of the matrix A.
   90: *>          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
   91: *>          or the upper triangle (if UPLO='U') of A, including the
   92: *>          diagonal, is destroyed.
   93: *> \endverbatim
   94: *>
   95: *> \param[in] LDA
   96: *> \verbatim
   97: *>          LDA is INTEGER
   98: *>          The leading dimension of the array A.  LDA >= max(1,N).
   99: *> \endverbatim
  100: *>
  101: *> \param[out] W
  102: *> \verbatim
  103: *>          W is DOUBLE PRECISION array, dimension (N)
  104: *>          If INFO = 0, the eigenvalues in ascending order.
  105: *> \endverbatim
  106: *>
  107: *> \param[out] WORK
  108: *> \verbatim
  109: *>          WORK is DOUBLE PRECISION array,
  110: *>                                         dimension (LWORK)
  111: *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
  112: *> \endverbatim
  113: *>
  114: *> \param[in] LWORK
  115: *> \verbatim
  116: *>          LWORK is INTEGER
  117: *>          The dimension of the array WORK.
  118: *>          If N <= 1,               LWORK must be at least 1.
  119: *>          If JOBZ = 'N' and N > 1, LWORK must be queried.
  120: *>                                   LWORK = MAX(1, dimension) where
  121: *>                                   dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1
  122: *>                                             = N*KD + N*max(KD+1,FACTOPTNB) 
  123: *>                                               + max(2*KD*KD, KD*NTHREADS) 
  124: *>                                               + (KD+1)*N + 2*N+1
  125: *>                                   where KD is the blocking size of the reduction,
  126: *>                                   FACTOPTNB is the blocking used by the QR or LQ
  127: *>                                   algorithm, usually FACTOPTNB=128 is a good choice
  128: *>                                   NTHREADS is the number of threads used when
  129: *>                                   openMP compilation is enabled, otherwise =1.
  130: *>          If JOBZ = 'V' and N > 1, LWORK must be at least
  131: *>                                                1 + 6*N + 2*N**2.
  132: *>
  133: *>          If LWORK = -1, then a workspace query is assumed; the routine
  134: *>          only calculates the optimal sizes of the WORK and IWORK
  135: *>          arrays, returns these values as the first entries of the WORK
  136: *>          and IWORK arrays, and no error message related to LWORK or
  137: *>          LIWORK is issued by XERBLA.
  138: *> \endverbatim
  139: *>
  140: *> \param[out] IWORK
  141: *> \verbatim
  142: *>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
  143: *>          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
  144: *> \endverbatim
  145: *>
  146: *> \param[in] LIWORK
  147: *> \verbatim
  148: *>          LIWORK is INTEGER
  149: *>          The dimension of the array IWORK.
  150: *>          If N <= 1,                LIWORK must be at least 1.
  151: *>          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
  152: *>          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
  153: *>
  154: *>          If LIWORK = -1, then a workspace query is assumed; the
  155: *>          routine only calculates the optimal sizes of the WORK and
  156: *>          IWORK arrays, returns these values as the first entries of
  157: *>          the WORK and IWORK arrays, and no error message related to
  158: *>          LWORK or LIWORK is issued by XERBLA.
  159: *> \endverbatim
  160: *>
  161: *> \param[out] INFO
  162: *> \verbatim
  163: *>          INFO is INTEGER
  164: *>          = 0:  successful exit
  165: *>          < 0:  if INFO = -i, the i-th argument had an illegal value
  166: *>          > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed
  167: *>                to converge; i off-diagonal elements of an intermediate
  168: *>                tridiagonal form did not converge to zero;
  169: *>                if INFO = i and JOBZ = 'V', then the algorithm failed
  170: *>                to compute an eigenvalue while working on the submatrix
  171: *>                lying in rows and columns INFO/(N+1) through
  172: *>                mod(INFO,N+1).
  173: *> \endverbatim
  174: *
  175: *  Authors:
  176: *  ========
  177: *
  178: *> \author Univ. of Tennessee
  179: *> \author Univ. of California Berkeley
  180: *> \author Univ. of Colorado Denver
  181: *> \author NAG Ltd.
  182: *
  183: *> \ingroup doubleSYeigen
  184: *
  185: *> \par Contributors:
  186: *  ==================
  187: *>
  188: *> Jeff Rutter, Computer Science Division, University of California
  189: *> at Berkeley, USA \n
  190: *>  Modified by Francoise Tisseur, University of Tennessee \n
  191: *>  Modified description of INFO. Sven, 16 Feb 05. \n
  192: *> \par Further Details:
  193: *  =====================
  194: *>
  195: *> \verbatim
  196: *>
  197: *>  All details about the 2stage techniques are available in:
  198: *>
  199: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  200: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  201: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  202: *>  of 2011 International Conference for High Performance Computing,
  203: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  204: *>  Article 8 , 11 pages.
  205: *>  http://doi.acm.org/10.1145/2063384.2063394
  206: *>
  207: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  208: *>  An improved parallel singular value algorithm and its implementation 
  209: *>  for multicore hardware, In Proceedings of 2013 International Conference
  210: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  211: *>  Denver, Colorado, USA, 2013.
  212: *>  Article 90, 12 pages.
  213: *>  http://doi.acm.org/10.1145/2503210.2503292
  214: *>
  215: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  216: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  217: *>  calculations based on fine-grained memory aware tasks.
  218: *>  International Journal of High Performance Computing Applications.
  219: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  220: *>  http://hpc.sagepub.com/content/28/2/196 
  221: *>
  222: *> \endverbatim
  223: *
  224: *  =====================================================================
  225:       SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
  226:      $                          IWORK, LIWORK, INFO )
  227: *
  228:       IMPLICIT NONE
  229: *
  230: *  -- LAPACK driver routine --
  231: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  232: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  233: *
  234: *     .. Scalar Arguments ..
  235:       CHARACTER          JOBZ, UPLO
  236:       INTEGER            INFO, LDA, LIWORK, LWORK, N
  237: *     ..
  238: *     .. Array Arguments ..
  239:       INTEGER            IWORK( * )
  240:       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
  241: *     ..
  242: *
  243: *  =====================================================================
  244: *
  245: *     .. Parameters ..
  246:       DOUBLE PRECISION   ZERO, ONE
  247:       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
  248: *     ..
  249: *     .. Local Scalars ..
  250: *
  251:       LOGICAL            LOWER, LQUERY, WANTZ
  252:       INTEGER            IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE,
  253:      $                   LIWMIN, LLWORK, LLWRK2, LWMIN,
  254:      $                   LHTRD, LWTRD, KD, IB, INDHOUS
  255:       DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
  256:      $                   SMLNUM
  257: *     ..
  258: *     .. External Functions ..
  259:       LOGICAL            LSAME
  260:       INTEGER            ILAENV2STAGE
  261:       DOUBLE PRECISION   DLAMCH, DLANSY
  262:       EXTERNAL           LSAME, DLAMCH, DLANSY, ILAENV2STAGE
  263: *     ..
  264: *     .. External Subroutines ..
  265:       EXTERNAL           DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF,
  266:      $                   DSYTRD_2STAGE, XERBLA
  267: *     ..
  268: *     .. Intrinsic Functions ..
  269:       INTRINSIC          MAX, SQRT
  270: *     ..
  271: *     .. Executable Statements ..
  272: *
  273: *     Test the input parameters.
  274: *
  275:       WANTZ = LSAME( JOBZ, 'V' )
  276:       LOWER = LSAME( UPLO, 'L' )
  277:       LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
  278: *
  279:       INFO = 0
  280:       IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
  281:          INFO = -1
  282:       ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
  283:          INFO = -2
  284:       ELSE IF( N.LT.0 ) THEN
  285:          INFO = -3
  286:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  287:          INFO = -5
  288:       END IF
  289: *
  290:       IF( INFO.EQ.0 ) THEN
  291:          IF( N.LE.1 ) THEN
  292:             LIWMIN = 1
  293:             LWMIN = 1
  294:          ELSE
  295:             KD    = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ,
  296:      $                            N, -1, -1, -1 )
  297:             IB    = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ,
  298:      $                            N, KD, -1, -1 )
  299:             LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ,
  300:      $                            N, KD, IB, -1 )
  301:             LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ,
  302:      $                            N, KD, IB, -1 )
  303:             IF( WANTZ ) THEN
  304:                LIWMIN = 3 + 5*N
  305:                LWMIN = 1 + 6*N + 2*N**2
  306:             ELSE
  307:                LIWMIN = 1
  308:                LWMIN = 2*N + 1 + LHTRD + LWTRD
  309:             END IF
  310:          END IF
  311:          WORK( 1 )  = LWMIN
  312:          IWORK( 1 ) = LIWMIN
  313: *
  314:          IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
  315:             INFO = -8
  316:          ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
  317:             INFO = -10
  318:          END IF
  319:       END IF
  320: *
  321:       IF( INFO.NE.0 ) THEN
  322:          CALL XERBLA( 'DSYEVD_2STAGE', -INFO )
  323:          RETURN
  324:       ELSE IF( LQUERY ) THEN
  325:          RETURN
  326:       END IF
  327: *
  328: *     Quick return if possible
  329: *
  330:       IF( N.EQ.0 )
  331:      $   RETURN
  332: *
  333:       IF( N.EQ.1 ) THEN
  334:          W( 1 ) = A( 1, 1 )
  335:          IF( WANTZ )
  336:      $      A( 1, 1 ) = ONE
  337:          RETURN
  338:       END IF
  339: *
  340: *     Get machine constants.
  341: *
  342:       SAFMIN = DLAMCH( 'Safe minimum' )
  343:       EPS    = DLAMCH( 'Precision' )
  344:       SMLNUM = SAFMIN / EPS
  345:       BIGNUM = ONE / SMLNUM
  346:       RMIN   = SQRT( SMLNUM )
  347:       RMAX   = SQRT( BIGNUM )
  348: *
  349: *     Scale matrix to allowable range, if necessary.
  350: *
  351:       ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK )
  352:       ISCALE = 0
  353:       IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
  354:          ISCALE = 1
  355:          SIGMA = RMIN / ANRM
  356:       ELSE IF( ANRM.GT.RMAX ) THEN
  357:          ISCALE = 1
  358:          SIGMA = RMAX / ANRM
  359:       END IF
  360:       IF( ISCALE.EQ.1 )
  361:      $   CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
  362: *
  363: *     Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form.
  364: *
  365:       INDE    = 1
  366:       INDTAU  = INDE + N
  367:       INDHOUS = INDTAU + N
  368:       INDWRK  = INDHOUS + LHTRD
  369:       LLWORK  = LWORK - INDWRK + 1
  370:       INDWK2  = INDWRK + N*N
  371:       LLWRK2  = LWORK - INDWK2 + 1
  372: *
  373:       CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ),
  374:      $                    WORK( INDTAU ), WORK( INDHOUS ), LHTRD, 
  375:      $                    WORK( INDWRK ), LLWORK, IINFO )
  376: *
  377: *     For eigenvalues only, call DSTERF.  For eigenvectors, first call
  378: *     DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
  379: *     tridiagonal matrix, then call DORMTR to multiply it by the
  380: *     Householder transformations stored in A.
  381: *
  382:       IF( .NOT.WANTZ ) THEN
  383:          CALL DSTERF( N, W, WORK( INDE ), INFO )
  384:       ELSE
  385: *        Not available in this release, and argument checking should not
  386: *        let it getting here
  387:          RETURN
  388:          CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
  389:      $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
  390:          CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
  391:      $                WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
  392:          CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
  393:       END IF
  394: *
  395: *     If matrix was scaled, then rescale eigenvalues appropriately.
  396: *
  397:       IF( ISCALE.EQ.1 )
  398:      $   CALL DSCAL( N, ONE / SIGMA, W, 1 )
  399: *
  400:       WORK( 1 )  = LWMIN
  401:       IWORK( 1 ) = LIWMIN
  402: *
  403:       RETURN
  404: *
  405: *     End of DSYEVD_2STAGE
  406: *
  407:       END

CVSweb interface <joel.bertrand@systella.fr>